; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = sext i16 %b to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r4 = sext i16 %b to i64
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r5 = sext i32 %c to i64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %r0, i16* undef, align 2
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %r1, i32* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %r2, i64* undef, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %r3, i32* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %r4, i64* undef, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %r5, i64* undef, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %r0, ptr undef, align 2
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %r1, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %r2, ptr undef, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %r3, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %r4, ptr undef, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %r5, ptr undef, align 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r6 = zext i8 %a to i16
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = zext i8 %a to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = zext i8 %a to i64
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %b to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r10 = zext i16 %b to i64
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r11 = zext i32 %c to i64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %r6, i16* undef, align 2
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %r7, i32* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %r8, i64* undef, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %r9, i32* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %r10, i64* undef, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %r11, i64* undef, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %r6, ptr undef, align 2
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %r7, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %r8, ptr undef, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %r9, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %r10, ptr undef, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %r11, ptr undef, align 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r12 = trunc i64 %d to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r13 = trunc i64 %d to i16
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r14 = trunc i64 %d to i8
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r23 = zext i1 %e to i16
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r24 = zext i1 %e to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r25 = zext i1 %e to i64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %r18, i8* undef, align 1
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %r19, i16* undef, align 2
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %r20, i32* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %r21, i64* undef, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %r22, i8* undef, align 1
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %r23, i16* undef, align 2
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %r24, i32* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %r25, i64* undef, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %r18, ptr undef, align 1
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %r19, ptr undef, align 2
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %r20, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %r21, ptr undef, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %r22, ptr undef, align 1
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %r23, ptr undef, align 2
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %r24, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %r25, ptr undef, align 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r12
;
%r0 = sext i8 %a to i16
%r3 = sext i16 %b to i32
%r4 = sext i16 %b to i64
%r5 = sext i32 %c to i64
- store i16 %r0, i16* undef
- store i32 %r1, i32* undef
- store i64 %r2, i64* undef
- store i32 %r3, i32* undef
- store i64 %r4, i64* undef
- store i64 %r5, i64* undef
+ store i16 %r0, ptr undef
+ store i32 %r1, ptr undef
+ store i64 %r2, ptr undef
+ store i32 %r3, ptr undef
+ store i64 %r4, ptr undef
+ store i64 %r5, ptr undef
%r6 = zext i8 %a to i16
%r7 = zext i8 %a to i32
%r9 = zext i16 %b to i32
%r10 = zext i16 %b to i64
%r11 = zext i32 %c to i64
- store i16 %r6, i16* undef
- store i32 %r7, i32* undef
- store i64 %r8, i64* undef
- store i32 %r9, i32* undef
- store i64 %r10, i64* undef
- store i64 %r11, i64* undef
+ store i16 %r6, ptr undef
+ store i32 %r7, ptr undef
+ store i64 %r8, ptr undef
+ store i32 %r9, ptr undef
+ store i64 %r10, ptr undef
+ store i64 %r11, ptr undef
%r12 = trunc i64 %d to i32
%r13 = trunc i64 %d to i16
%r23 = zext i1 %e to i16
%r24 = zext i1 %e to i32
%r25 = zext i1 %e to i64
- store i8 %r18, i8* undef
- store i16 %r19, i16* undef
- store i32 %r20, i32* undef
- store i64 %r21, i64* undef
- store i8 %r22, i8* undef
- store i16 %r23, i16* undef
- store i32 %r24, i32* undef
- store i64 %r25, i64* undef
+ store i8 %r18, ptr undef
+ store i16 %r19, ptr undef
+ store i32 %r20, ptr undef
+ store i64 %r21, ptr undef
+ store i8 %r22, ptr undef
+ store i16 %r23, ptr undef
+ store i32 %r24, ptr undef
+ store i64 %r25, ptr undef
ret i32 %r12
}
define i32 @load_extends() #0 {
; CHECK-LABEL: 'load_extends'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadnxv2i32 = load <vscale x 2 x i32>, <vscale x 2 x i32>* undef, align 8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadnxv4i32 = load <vscale x 4 x i32>, <vscale x 4 x i32>* undef, align 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, ptr undef, align 1
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, ptr undef, align 2
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv2i8 = load <2 x i8>, ptr undef, align 2
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i8 = load <4 x i8>, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, ptr undef, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv2i16 = load <2 x i16>, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, ptr undef, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, ptr undef, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, ptr undef, align 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadnxv2i32 = load <vscale x 2 x i32>, ptr undef, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadnxv4i32 = load <vscale x 4 x i32>, ptr undef, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17 = zext <vscale x 4 x i32> %loadnxv4i32 to <vscale x 4 x i64>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %loadi8 = load i8, i8* undef
- %loadi16 = load i16, i16* undef
- %loadi32 = load i32, i32* undef
- %loadv2i8 = load <2 x i8>, <2 x i8>* undef
- %loadv4i8 = load <4 x i8>, <4 x i8>* undef
- %loadv8i8 = load <8 x i8>, <8 x i8>* undef
- %loadv2i16 = load <2 x i16>, <2 x i16>* undef
- %loadv4i16 = load <4 x i16>, <4 x i16>* undef
- %loadv2i32 = load <2 x i32>, <2 x i32>* undef
- %loadv4i32 = load <4 x i32>, <4 x i32>* undef
- %loadnxv2i32 = load <vscale x 2 x i32>, <vscale x 2 x i32>* undef
- %loadnxv4i32 = load <vscale x 4 x i32>, <vscale x 4 x i32>* undef
+ %loadi8 = load i8, ptr undef
+ %loadi16 = load i16, ptr undef
+ %loadi32 = load i32, ptr undef
+ %loadv2i8 = load <2 x i8>, ptr undef
+ %loadv4i8 = load <4 x i8>, ptr undef
+ %loadv8i8 = load <8 x i8>, ptr undef
+ %loadv2i16 = load <2 x i16>, ptr undef
+ %loadv4i16 = load <4 x i16>, ptr undef
+ %loadv2i32 = load <2 x i32>, ptr undef
+ %loadv4i32 = load <4 x i32>, ptr undef
+ %loadnxv2i32 = load <vscale x 2 x i32>, ptr undef
+ %loadnxv4i32 = load <vscale x 4 x i32>, ptr undef
%r0 = sext i8 %loadi8 to i16
%r1 = zext i8 %loadi8 to i16
define i32 @store_truncs() {
; CHECK-LABEL: 'store_truncs'
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = trunc i64 undef to i8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %r0, i8* undef, align 1
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %r0, ptr undef, align 1
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = trunc i64 undef to i16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %r1, i16* undef, align 2
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %r1, ptr undef, align 2
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = trunc i64 undef to i32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %r2, i32* undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %r2, ptr undef, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = trunc i32 undef to i8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %r3, i8* undef, align 1
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %r3, ptr undef, align 1
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r4 = trunc i32 undef to i16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %r4, i16* undef, align 2
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %r4, ptr undef, align 2
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r5 = trunc i16 undef to i8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %r5, i8* undef, align 1
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %r5, ptr undef, align 1
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%r0 = trunc i64 undef to i8
- store i8 %r0, i8* undef
+ store i8 %r0, ptr undef
%r1 = trunc i64 undef to i16
- store i16 %r1, i16* undef
+ store i16 %r1, ptr undef
%r2 = trunc i64 undef to i32
- store i32 %r2, i32* undef
+ store i32 %r2, ptr undef
%r3 = trunc i32 undef to i8
- store i8 %r3, i8* undef
+ store i8 %r3, ptr undef
%r4 = trunc i32 undef to i16
- store i16 %r4, i16* undef
+ store i16 %r4, ptr undef
%r5 = trunc i16 undef to i8
- store i8 %r5, i8* undef
+ store i8 %r5, ptr undef
ret i32 undef
}
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %retval = getelementptr
-define <vscale x 16 x i8>* @gep_scalable_vector(<vscale x 16 x i8>* %ptr) {
- %retval = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %ptr, i32 2
- ret <vscale x 16 x i8>* %retval
+define ptr @gep_scalable_vector(ptr %ptr) {
+ %retval = getelementptr <vscale x 16 x i8>, ptr %ptr, i32 2
+ ret ptr %retval
}
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
; Check that cost is 1 for unusual load to register sized load.
-define i32 @loadUnusualIntegerWithTrunc(i128* %ptr) {
+define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualIntegerWithTrunc'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, i128* %ptr, align 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc = trunc i128 %out to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %trunc
;
- %out = load i128, i128* %ptr
+ %out = load i128, ptr %ptr
%trunc = trunc i128 %out to i32
ret i32 %trunc
}
-define i128 @loadUnusualInteger(i128* %ptr) {
+define i128 @loadUnusualInteger(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualInteger'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %out = load i128, i128* %ptr, align 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %out = load i128, ptr %ptr, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i128 %out
;
- %out = load i128, i128* %ptr
+ %out = load i128, ptr %ptr
ret i128 %out
}
define void @fixed() {
; CHECK-LABEL: 'fixed'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 8, <2 x i1> undef, <2 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 8, <4 x i1> undef, <4 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 8, <8 x i1> undef, <8 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 8, <16 x i1> undef, <16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 8, <2 x i1> undef, <2 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 8, <4 x i1> undef, <4 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 8, <8 x i1> undef, <8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 8, <2 x i1> undef, <2 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 8, <4 x i1> undef, <4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 8, <2 x i1> undef, <2 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 8, <2 x i1> undef, <2 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 8, <4 x i1> undef, <4 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 8, <8 x i1> undef, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 8, <2 x i1> undef, <2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 8, <4 x i1> undef, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 8, <2 x i1> undef, <2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 8, <4 x i1> undef, <4 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0v32f16(<32 x half>* undef, i32 8, <32 x i1> undef, <32 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 8, <16 x i1> undef, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
entry:
; Legal fixed-width integer types
- %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8> *undef, i32 8, <2 x i1> undef, <2 x i8> undef)
- %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8> *undef, i32 8, <4 x i1> undef, <4 x i8> undef)
- %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8> *undef, i32 8, <8 x i1> undef, <8 x i8> undef)
- %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8> *undef, i32 8, <16 x i1> undef, <16 x i8> undef)
- %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16> *undef, i32 8, <2 x i1> undef, <2 x i16> undef)
- %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16> *undef, i32 8, <4 x i1> undef, <4 x i16> undef)
- %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16> *undef, i32 8, <8 x i1> undef, <8 x i16> undef)
- %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32> *undef, i32 8, <2 x i1> undef, <2 x i32> undef)
- %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32> *undef, i32 8, <4 x i1> undef, <4 x i32> undef)
- %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64> *undef, i32 8, <2 x i1> undef, <2 x i64> undef)
+ %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i8> undef)
+ %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i8> undef)
+ %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i8> undef)
+ %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 8, <16 x i1> undef, <16 x i8> undef)
+ %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i16> undef)
+ %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i16> undef)
+ %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i16> undef)
+ %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i32> undef)
+ %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i32> undef)
+ %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i64> undef)
; Legal fixed-width floating point types
- %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half> *undef, i32 8, <2 x i1> undef, <2 x half> undef)
- %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half> *undef, i32 8, <4 x i1> undef, <4 x half> undef)
- %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half> *undef, i32 8, <8 x i1> undef, <8 x half> undef)
- %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float> *undef, i32 8, <2 x i1> undef, <2 x float> undef)
- %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float> *undef, i32 8, <4 x i1> undef, <4 x float> undef)
- %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double> *undef, i32 8, <2 x i1> undef, <2 x double> undef)
+ %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef)
+ %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef)
+ %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef)
+ %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x float> undef)
+ %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x float> undef)
+ %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x double> undef)
; A couple of examples of illegal fixed-width types
- %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64> *undef, i32 8, <4 x i1> undef, <4 x i64> undef)
- %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0v32f16(<32 x half> *undef, i32 8, <32 x i1> undef, <32 x half> undef)
+ %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i64> undef)
+ %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef)
ret void
}
define void @scalable() {
; CHECK-LABEL: 'scalable'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16 = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16 = call <vscale x 2 x half> @llvm.masked.load.nxv2f16.p0nxv2f16(<vscale x 2 x half>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16 = call <vscale x 4 x half> @llvm.masked.load.nxv4f16.p0nxv4f16(<vscale x 4 x half>* undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16 = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32 = call <vscale x 2 x float> @llvm.masked.load.nxv2f32.p0nxv2f32(<vscale x 2 x float>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64 = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv1i64 = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0nxv1i64(<vscale x 1 x i64>* undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64 = call <vscale x 4 x i64> @llvm.masked.load.nxv4i64.p0nxv4i64(<vscale x 4 x i64>* undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32f16 = call <vscale x 32 x half> @llvm.masked.load.nxv32f16.p0nxv32f16(<vscale x 32 x half>* undef, i32 8, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16 = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16 = call <vscale x 2 x half> @llvm.masked.load.nxv2f16.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16 = call <vscale x 4 x half> @llvm.masked.load.nxv4f16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16 = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32 = call <vscale x 2 x float> @llvm.masked.load.nxv2f32.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64 = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv1i64 = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64 = call <vscale x 4 x i64> @llvm.masked.load.nxv4i64.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32f16 = call <vscale x 32 x half> @llvm.masked.load.nxv32f16.p0(ptr undef, i32 8, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
entry:
; Legal scalable integer types
- %nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
- %nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8> *undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
- %nxv8i8 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8> *undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
- %nxv16i8 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8> *undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
- %nxv2i16 = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
- %nxv4i16 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16> *undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
- %nxv8i16 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16> *undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
- %nxv2i32 = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
- %nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32> *undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
- %nxv2i64 = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
+ %nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
+ %nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
+ %nxv8i8 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
+ %nxv16i8 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+ %nxv2i16 = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
+ %nxv4i16 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+ %nxv8i16 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+ %nxv2i32 = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
+ %nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
+ %nxv2i64 = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
; Legal scalable floating point types
- %nxv2f16 = call <vscale x 2 x half> @llvm.masked.load.nxv2f16.p0nxv2f16(<vscale x 2 x half> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
- %nxv4f16 = call <vscale x 4 x half> @llvm.masked.load.nxv4f16.p0nxv4f16(<vscale x 4 x half> *undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
- %nxv8f16 = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half> *undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
- %nxv2f32 = call <vscale x 2 x float> @llvm.masked.load.nxv2f32.p0nxv2f32(<vscale x 2 x float> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
- %nxv4f32 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float> *undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
- %nxv2f64 = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+ %nxv2f16 = call <vscale x 2 x half> @llvm.masked.load.nxv2f16.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
+ %nxv4f16 = call <vscale x 4 x half> @llvm.masked.load.nxv4f16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
+ %nxv8f16 = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
+ %nxv2f32 = call <vscale x 2 x float> @llvm.masked.load.nxv2f32.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+ %nxv4f32 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+ %nxv2f64 = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
; A couple of examples of illegal scalable types
- %nxv1i64 = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0nxv1i64(<vscale x 1 x i64> *undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
- %nxv4i64 = call <vscale x 4 x i64> @llvm.masked.load.nxv4i64.p0nxv4i64(<vscale x 4 x i64> *undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
- %nxv32f16 = call <vscale x 32 x half> @llvm.masked.load.nxv32f16.p0nxv32f16(<vscale x 32 x half> *undef, i32 8, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
+ %nxv1i64 = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
+ %nxv4i64 = call <vscale x 4 x i64> @llvm.masked.load.nxv4i64.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
+ %nxv32f16 = call <vscale x 32 x half> @llvm.masked.load.nxv32f16.p0(ptr undef, i32 8, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
ret void
}
-declare <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>*, i32, <2 x i1>, <2 x i8>)
-declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32, <4 x i1>, <4 x i8>)
-declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>)
-declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>)
-declare <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>*, i32, <2 x i1>, <2 x i16>)
-declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32, <4 x i1>, <4 x i16>)
-declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>)
-declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
-declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
-declare <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>*, i32, <2 x i1>, <2 x i64>)
-declare <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>*, i32, <4 x i1>, <4 x i64>)
-declare <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>*, i32, <2 x i1>, <2 x half>)
-declare <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>*, i32, <4 x i1>, <4 x half>)
-declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32, <8 x i1>, <8 x half>)
-declare <32 x half> @llvm.masked.load.v32f16.p0v32f16(<32 x half>*, i32, <32 x i1>, <32 x half>)
-declare <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
-declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
-declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
+declare <2 x i8> @llvm.masked.load.v2i8.p0(ptr, i32, <2 x i1>, <2 x i8>)
+declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32, <4 x i1>, <4 x i8>)
+declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32, <8 x i1>, <8 x i8>)
+declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32, <16 x i1>, <16 x i8>)
+declare <2 x i16> @llvm.masked.load.v2i16.p0(ptr, i32, <2 x i1>, <2 x i16>)
+declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32, <4 x i1>, <4 x i16>)
+declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32, <8 x i1>, <8 x i16>)
+declare <2 x i32> @llvm.masked.load.v2i32.p0(ptr, i32, <2 x i1>, <2 x i32>)
+declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>)
+declare <2 x i64> @llvm.masked.load.v2i64.p0(ptr, i32, <2 x i1>, <2 x i64>)
+declare <4 x i64> @llvm.masked.load.v4i64.p0(ptr, i32, <4 x i1>, <4 x i64>)
+declare <2 x half> @llvm.masked.load.v2f16.p0(ptr, i32, <2 x i1>, <2 x half>)
+declare <4 x half> @llvm.masked.load.v4f16.p0(ptr, i32, <4 x i1>, <4 x half>)
+declare <8 x half> @llvm.masked.load.v8f16.p0(ptr, i32, <8 x i1>, <8 x half>)
+declare <32 x half> @llvm.masked.load.v32f16.p0(ptr, i32, <32 x i1>, <32 x half>)
+declare <2 x float> @llvm.masked.load.v2f32.p0(ptr, i32, <2 x i1>, <2 x float>)
+declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32, <4 x i1>, <4 x float>)
+declare <2 x double> @llvm.masked.load.v2f64.p0(ptr, i32, <2 x i1>, <2 x double>)
-declare <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>*, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>*, i32, <vscale x 4 x i1>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>*, i32, <vscale x 8 x i1>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>*, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
-declare <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>*, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>*, i32, <vscale x 4 x i1>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>*, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
-declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>*, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>*, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>*, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.masked.load.nxv4i64.p0nxv4i64(<vscale x 4 x i64>*, i32, <vscale x 4 x i1>, <vscale x 4 x i64>)
-declare <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0nxv1i64(<vscale x 1 x i64>*, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)
-declare <vscale x 2 x half> @llvm.masked.load.nxv2f16.p0nxv2f16(<vscale x 2 x half>*, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
-declare <vscale x 4 x half> @llvm.masked.load.nxv4f16.p0nxv4f16(<vscale x 4 x half>*, i32, <vscale x 4 x i1>, <vscale x 4 x half>)
-declare <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>*, i32, <vscale x 8 x i1>, <vscale x 8 x half>)
-declare <vscale x 32 x half> @llvm.masked.load.nxv32f16.p0nxv32f16(<vscale x 32 x half>*, i32, <vscale x 32 x i1>, <vscale x 32 x half>)
-declare <vscale x 2 x float> @llvm.masked.load.nxv2f32.p0nxv2f32(<vscale x 2 x float>*, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
-declare <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>*, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
-declare <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>*, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
+declare <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr, i32, <vscale x 8 x i1>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
+declare <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.masked.load.nxv4i64.p0(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i64>)
+declare <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)
+declare <vscale x 2 x half> @llvm.masked.load.nxv2f16.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
+declare <vscale x 4 x half> @llvm.masked.load.nxv4f16.p0(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x half>)
+declare <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0(ptr, i32, <vscale x 8 x i1>, <vscale x 8 x half>)
+declare <vscale x 32 x half> @llvm.masked.load.nxv32f16.p0(ptr, i32, <vscale x 32 x i1>, <vscale x 32 x half>)
+declare <vscale x 2 x float> @llvm.masked.load.nxv2f32.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
+declare <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
define void @fixed_sve_vls() #0 {
; CHECK-LABEL: 'fixed_sve_vls'
-; CHECK: Cost Model: Found an estimated cost of [[#div(2047,VBITS)+1]] for instruction: %v256i8 = call <256 x i8> @llvm.masked.load.v256i8.p0v256i8(<256 x i8>* undef, i32 8, <256 x i1> undef, <256 x i8> undef)
-; CHECK: Cost Model: Found an estimated cost of [[#div(4091,VBITS)+1]] for instruction: %v256i16 = call <256 x i16> @llvm.masked.load.v256i16.p0v256i16(<256 x i16>* undef, i32 8, <256 x i1> undef, <256 x i16> undef)
-; CHECK: Cost Model: Found an estimated cost of [[#div(511,VBITS)+1]] for instruction: %v16i32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 8, <16 x i1> undef, <16 x i32> undef)
-; CHECK: Cost Model: Found an estimated cost of [[#div(1023,VBITS)+1]] for instruction: %v16i64 = call <16 x i64> @llvm.masked.load.v16i64.p0v16i64(<16 x i64>* undef, i32 8, <16 x i1> undef, <16 x i64> undef)
-; CHECK: Cost Model: Found an estimated cost of [[#div(8191,VBITS)+1]] for instruction: %v512f16 = call <512 x half> @llvm.masked.load.v512f16.p0v512f16(<512 x half>* undef, i32 8, <512 x i1> undef, <512 x half> undef)
-; CHECK: Cost Model: Found an estimated cost of [[#div(8191,VBITS)+1]] for instruction: %v256f32 = call <256 x float> @llvm.masked.load.v256f32.p0v256f32(<256 x float>* undef, i32 8, <256 x i1> undef, <256 x float> undef)
-; CHECK: Cost Model: Found an estimated cost of [[#div(8191,VBITS)+1]] for instruction: %v128f64 = call <128 x double> @llvm.masked.load.v128f64.p0v128f64(<128 x double>* undef, i32 8, <128 x i1> undef, <128 x double> undef)
+; CHECK: Cost Model: Found an estimated cost of [[#div(2047,VBITS)+1]] for instruction: %v256i8 = call <256 x i8> @llvm.masked.load.v256i8.p0(ptr undef, i32 8, <256 x i1> undef, <256 x i8> undef)
+; CHECK: Cost Model: Found an estimated cost of [[#div(4091,VBITS)+1]] for instruction: %v256i16 = call <256 x i16> @llvm.masked.load.v256i16.p0(ptr undef, i32 8, <256 x i1> undef, <256 x i16> undef)
+; CHECK: Cost Model: Found an estimated cost of [[#div(511,VBITS)+1]] for instruction: %v16i32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 8, <16 x i1> undef, <16 x i32> undef)
+; CHECK: Cost Model: Found an estimated cost of [[#div(1023,VBITS)+1]] for instruction: %v16i64 = call <16 x i64> @llvm.masked.load.v16i64.p0(ptr undef, i32 8, <16 x i1> undef, <16 x i64> undef)
+; CHECK: Cost Model: Found an estimated cost of [[#div(8191,VBITS)+1]] for instruction: %v512f16 = call <512 x half> @llvm.masked.load.v512f16.p0(ptr undef, i32 8, <512 x i1> undef, <512 x half> undef)
+; CHECK: Cost Model: Found an estimated cost of [[#div(8191,VBITS)+1]] for instruction: %v256f32 = call <256 x float> @llvm.masked.load.v256f32.p0(ptr undef, i32 8, <256 x i1> undef, <256 x float> undef)
+; CHECK: Cost Model: Found an estimated cost of [[#div(8191,VBITS)+1]] for instruction: %v128f64 = call <128 x double> @llvm.masked.load.v128f64.p0(ptr undef, i32 8, <128 x i1> undef, <128 x double> undef)
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: ret void
entry:
- %v256i8 = call <256 x i8> @llvm.masked.load.v256i8.p0v256i8(<256 x i8> *undef, i32 8, <256 x i1> undef, <256 x i8> undef)
- %v256i16 = call <256 x i16> @llvm.masked.load.v256i16.p0v256i16(<256 x i16> *undef, i32 8, <256 x i1> undef, <256 x i16> undef)
- %v16i32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32> *undef, i32 8, <16 x i1> undef, <16 x i32> undef)
- %v16i64 = call <16 x i64> @llvm.masked.load.v16i64.p0v16i64(<16 x i64> *undef, i32 8, <16 x i1> undef, <16 x i64> undef)
+ %v256i8 = call <256 x i8> @llvm.masked.load.v256i8.p0(ptr undef, i32 8, <256 x i1> undef, <256 x i8> undef)
+ %v256i16 = call <256 x i16> @llvm.masked.load.v256i16.p0(ptr undef, i32 8, <256 x i1> undef, <256 x i16> undef)
+ %v16i32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 8, <16 x i1> undef, <16 x i32> undef)
+ %v16i64 = call <16 x i64> @llvm.masked.load.v16i64.p0(ptr undef, i32 8, <16 x i1> undef, <16 x i64> undef)
- %v512f16 = call <512 x half> @llvm.masked.load.v512f16.p0v512f16(<512 x half> *undef, i32 8, <512 x i1> undef, <512 x half> undef)
- %v256f32 = call <256 x float> @llvm.masked.load.v256f32.p0v256f32(<256 x float> *undef, i32 8, <256 x i1> undef, <256 x float> undef)
- %v128f64 = call <128 x double> @llvm.masked.load.v128f64.p0v128f64(<128 x double> *undef, i32 8, <128 x i1> undef, <128 x double> undef)
+ %v512f16 = call <512 x half> @llvm.masked.load.v512f16.p0(ptr undef, i32 8, <512 x i1> undef, <512 x half> undef)
+ %v256f32 = call <256 x float> @llvm.masked.load.v256f32.p0(ptr undef, i32 8, <256 x i1> undef, <256 x float> undef)
+ %v128f64 = call <128 x double> @llvm.masked.load.v128f64.p0(ptr undef, i32 8, <128 x i1> undef, <128 x double> undef)
ret void
}
-declare <256 x i8> @llvm.masked.load.v256i8.p0v256i8(<256 x i8>*, i32, <256 x i1>, <256 x i8>)
-declare <256 x i16> @llvm.masked.load.v256i16.p0v256i16(<256 x i16>*, i32, <256 x i1>, <256 x i16>)
-declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
-declare <16 x i64> @llvm.masked.load.v16i64.p0v16i64(<16 x i64>*, i32, <16 x i1>, <16 x i64>)
+declare <256 x i8> @llvm.masked.load.v256i8.p0(ptr, i32, <256 x i1>, <256 x i8>)
+declare <256 x i16> @llvm.masked.load.v256i16.p0(ptr, i32, <256 x i1>, <256 x i16>)
+declare <16 x i32> @llvm.masked.load.v16i32.p0(ptr, i32, <16 x i1>, <16 x i32>)
+declare <16 x i64> @llvm.masked.load.v16i64.p0(ptr, i32, <16 x i1>, <16 x i64>)
-declare <512 x half> @llvm.masked.load.v512f16.p0v512f16(<512 x half>*, i32, <512 x i1>, <512 x half>)
-declare <256 x float> @llvm.masked.load.v256f32.p0v256f32(<256 x float>*, i32, <256 x i1>, <256 x float>)
-declare <128 x double> @llvm.masked.load.v128f64.p0v128f64(<128 x double>*, i32, <128 x i1>, <128 x double>)
+declare <512 x half> @llvm.masked.load.v512f16.p0(ptr, i32, <512 x i1>, <512 x half>)
+declare <256 x float> @llvm.masked.load.v256f32.p0(ptr, i32, <256 x i1>, <256 x float>)
+declare <128 x double> @llvm.masked.load.v128f64.p0(ptr, i32, <128 x i1>, <128 x double>)
attributes #0 = { "target-features"="+sve" }
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
-define <16 x i8> @load16(<16 x i8>* %ptr) {
+define <16 x i8> @load16(ptr %ptr) {
; CHECK: function 'load16'
; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
- %out = load <16 x i8>, <16 x i8>* %ptr
+ %out = load <16 x i8>, ptr %ptr
ret <16 x i8> %out
}
-define void @store16(<16 x i8>* %ptr, <16 x i8> %val) {
+define void @store16(ptr %ptr, <16 x i8> %val) {
; CHECK: function 'store16'
; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
- store <16 x i8> %val, <16 x i8>* %ptr
+ store <16 x i8> %val, ptr %ptr
ret void
}
-define <8 x i8> @load8(<8 x i8>* %ptr) {
+define <8 x i8> @load8(ptr %ptr) {
; CHECK: function 'load8'
; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
- %out = load <8 x i8>, <8 x i8>* %ptr
+ %out = load <8 x i8>, ptr %ptr
ret <8 x i8> %out
}
-define void @store8(<8 x i8>* %ptr, <8 x i8> %val) {
+define void @store8(ptr %ptr, <8 x i8> %val) {
; CHECK: function 'store8'
; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
- store <8 x i8> %val, <8 x i8>* %ptr
+ store <8 x i8> %val, ptr %ptr
ret void
}
-define <4 x i8> @load4(<4 x i8>* %ptr) {
+define <4 x i8> @load4(ptr %ptr) {
; CHECK: function 'load4'
; CHECK-NEON: Cost Model: Found an estimated cost of 2 for instruction:
; CHECK-SVE-128: Cost Model: Found an estimated cost of 2 for instruction:
; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
- %out = load <4 x i8>, <4 x i8>* %ptr
+ %out = load <4 x i8>, ptr %ptr
ret <4 x i8> %out
}
-define void @store4(<4 x i8>* %ptr, <4 x i8> %val) {
+define void @store4(ptr %ptr, <4 x i8> %val) {
; CHECK: function 'store4'
; CHECK-NEON: Cost Model: Found an estimated cost of 2 for instruction:
; CHECK-SVE-128: Cost Model: Found an estimated cost of 2 for instruction:
; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
- store <4 x i8> %val, <4 x i8>* %ptr
+ store <4 x i8> %val, ptr %ptr
ret void
}
-define <16 x i16> @load_256(<16 x i16>* %ptr) {
+define <16 x i16> @load_256(ptr %ptr) {
; CHECK: function 'load_256'
; CHECK-NEON: Cost Model: Found an estimated cost of 2 for instruction:
; CHECK-SVE-128: Cost Model: Found an estimated cost of 2 for instruction:
; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
- %out = load <16 x i16>, <16 x i16>* %ptr
+ %out = load <16 x i16>, ptr %ptr
ret <16 x i16> %out
}
-define <8 x i64> @load_512(<8 x i64>* %ptr) {
+define <8 x i64> @load_512(ptr %ptr) {
; CHECK: function 'load_512'
; CHECK-NEON: Cost Model: Found an estimated cost of 4 for instruction:
; CHECK-SVE-128: Cost Model: Found an estimated cost of 4 for instruction:
; CHECK-SVE-256: Cost Model: Found an estimated cost of 2 for instruction:
; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
- %out = load <8 x i64>, <8 x i64>* %ptr
+ %out = load <8 x i64>, ptr %ptr
ret <8 x i64> %out
}
-declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32 immarg, <4 x i1>, <4 x i8>)
-define <4 x i8> @gather_load_4xi8_constant_mask(<4 x i8*> %ptrs) {
+declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32 immarg, <4 x i1>, <4 x i8>)
+define <4 x i8> @gather_load_4xi8_constant_mask(<4 x ptr> %ptrs) {
; CHECK: gather_load_4xi8_constant_mask
-; CHECK-NEON: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
+; CHECK-NEON: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0
;
- %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
+ %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
ret <4 x i8> %lv
}
-define <4 x i8> @gather_load_4xi8_variable_mask(<4 x i8*> %ptrs, <4 x i1> %cond) {
+define <4 x i8> @gather_load_4xi8_variable_mask(<4 x ptr> %ptrs, <4 x i1> %cond) {
; CHECK: gather_load_4xi8_variable_mask
-; CHECK-NEON: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
+; CHECK-NEON: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0
;
- %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef)
+ %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef)
ret <4 x i8> %lv
}
-declare void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8>, <4 x i8*>, i32 immarg, <4 x i1>)
-define void @scatter_store_4xi8_constant_mask(<4 x i8> %val, <4 x i8*> %ptrs) {
+declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32 immarg, <4 x i1>)
+define void @scatter_store_4xi8_constant_mask(<4 x i8> %val, <4 x ptr> %ptrs) {
; CHECK: scatter_store_4xi8_constant_mask
-; CHECK-NEON: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
+; CHECK-NEON: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(
;
- call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %val, <4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+ call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
ret void
}
-define void @scatter_store_4xi8_variable_mask(<4 x i8> %val, <4 x i8*> %ptrs, <4 x i1> %cond) {
+define void @scatter_store_4xi8_variable_mask(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %cond) {
; CHECK: scatter_store_4xi8_variable_mask
-; CHECK-NEON: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
+; CHECK-NEON: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(
;
- call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %val, <4 x i8*> %ptrs, i32 1, <4 x i1> %cond)
+ call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
ret void
}
-declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32 immarg, <4 x i1>, <4 x i32>)
-define <4 x i32> @gather_load_4xi32_constant_mask(<4 x i32*> %ptrs) {
+declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32 immarg, <4 x i1>, <4 x i32>)
+define <4 x i32> @gather_load_4xi32_constant_mask(<4 x ptr> %ptrs) {
; CHECK: gather_load_4xi32_constant_mask
-; CHECK-NEON: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
+; CHECK-NEON: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0
;
- %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %lv
}
-define <4 x i32> @gather_load_4xi32_variable_mask(<4 x i32*> %ptrs, <4 x i1> %cond) {
+define <4 x i32> @gather_load_4xi32_variable_mask(<4 x ptr> %ptrs, <4 x i1> %cond) {
; CHECK: gather_load_4xi32_variable_mask
-; CHECK-NEON: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
+; CHECK-NEON: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0
;
- %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef)
+ %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef)
ret <4 x i32> %lv
}
-declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32 immarg, <4 x i1>)
-define void @scatter_store_4xi32_constant_mask(<4 x i32> %val, <4 x i32*> %ptrs) {
+declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32 immarg, <4 x i1>)
+define void @scatter_store_4xi32_constant_mask(<4 x i32> %val, <4 x ptr> %ptrs) {
; CHECK: scatter_store_4xi32_constant_mask
-; CHECK-NEON: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
+; CHECK-NEON: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(
;
- call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+ call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
ret void
}
-define void @scatter_store_4xi32_variable_mask(<4 x i32> %val, <4 x i32*> %ptrs, <4 x i1> %cond) {
+define void @scatter_store_4xi32_variable_mask(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %cond) {
; CHECK: scatter_store_4xi32_variable_mask
-; CHECK-NEON: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
+; CHECK-NEON: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(
;
- call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %ptrs, i32 1, <4 x i1> %cond)
+ call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
ret void
}
-declare <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*>, i32, <256 x i1>, <256 x i16>)
+declare <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr>, i32, <256 x i1>, <256 x i16>)
define void @sve_gather_vls(<256 x i1> %v256i1mask) {
; CHECK-LABEL: 'sve_scatter_vls'
-; CHECK-NEON: Cost Model: Found an estimated cost of 1952 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 1952 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 2560 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 2560 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
+; CHECK-NEON: Cost Model: Found an estimated cost of 1952 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 1952 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 2560 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 2560 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
entry:
- %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
+ %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
ret void
}
-declare <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*>, i32, <256 x i1>, <256 x float>)
+declare <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr>, i32, <256 x i1>, <256 x float>)
define void @sve_gather_vls_float(<256 x i1> %v256i1mask) {
; CHECK-LABEL: 'sve_gather_vls_float'
-; CHECK-NEON: Cost Model: Found an estimated cost of 1856 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 1856 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 2560 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 2560 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
+; CHECK-NEON: Cost Model: Found an estimated cost of 1856 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 1856 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 2560 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 2560 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
entry:
- %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
+ %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
ret void
}
-declare void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8>, <256 x i8*>, i32, <256 x i1>)
+declare void @llvm.masked.scatter.v256i8.v256p0(<256 x i8>, <256 x ptr>, i32, <256 x i1>)
define void @sve_scatter_vls(<256 x i1> %v256i1mask){
; CHECK-LABEL: 'sve_scatter_vls'
-; CHECK-NEON: Cost Model: Found an estimated cost of 2000 for instruction: call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask)
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 2000 for instruction: call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask)
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 2560 for instruction: call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask)
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 2560 for instruction: call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask)
+; CHECK-NEON: Cost Model: Found an estimated cost of 2000 for instruction: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 2000 for instruction: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 2560 for instruction: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 2560 for instruction: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
entry:
- call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask)
+ call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
ret void
}
-declare void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half>, <512 x half*>, i32, <512 x i1>)
+declare void @llvm.masked.scatter.v512f16.v512p0(<512 x half>, <512 x ptr>, i32, <512 x i1>)
define void @sve_scatter_vls_float(<512 x i1> %v512i1mask){
; CHECK-LABEL: 'sve_scatter_vls_float'
-; CHECK-NEON: Cost Model: Found an estimated cost of 3904 for instruction: call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask)
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 3904 for instruction: call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask)
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 5120 for instruction: call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask)
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 5120 for instruction: call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask)
- call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask)
+; CHECK-NEON: Cost Model: Found an estimated cost of 3904 for instruction: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 3904 for instruction: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 5120 for instruction: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 5120 for instruction: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
+ call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
ret void
}
; The unlegalized 256-bit stores are further penalized when legalized down
; to 128-bit stores.
; CHECK-LABEL: 'getMemoryOpCost'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, <4 x double>* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, <8 x float>* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x half> undef, <16 x half>* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> undef, <8 x half>* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 2
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 4
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = load <2 x i8>, <2 x i8>* undef, align 2
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = load <4 x i8>, <4 x i8>* undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x half> undef, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> undef, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i8> undef, ptr undef, align 2
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> undef, ptr undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = load <2 x i8>, ptr undef, align 2
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = load <4 x i8>, ptr undef, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SIZE-LABEL: 'getMemoryOpCost'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 4
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 4
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, <4 x double>* undef, align 4
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, <8 x float>* undef, align 4
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x half> undef, <16 x half>* undef, align 4
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 4
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 4
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 4
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 4
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> undef, <8 x half>* undef, align 4
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 2
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 4
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <2 x i8>, <2 x i8>* undef, align 2
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load <4 x i8>, <4 x i8>* undef, align 4
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, ptr undef, align 4
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, ptr undef, align 4
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, ptr undef, align 4
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, ptr undef, align 4
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, ptr undef, align 4
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, ptr undef, align 4
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x half> undef, ptr undef, align 4
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 4
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 4
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 4
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 4
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 4
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 4
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> undef, ptr undef, align 4
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 2
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 4
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <2 x i8>, ptr undef, align 2
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load <4 x i8>, ptr undef, align 4
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SLOW_MISALIGNED_128_STORE-LABEL: 'getMemoryOpCost'
-; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4
-; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4
-; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 4
-; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 4
-; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: store <4 x double> undef, <4 x double>* undef, align 4
-; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: store <8 x float> undef, <8 x float>* undef, align 4
-; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: store <16 x half> undef, <16 x half>* undef, align 4
-; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4
-; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
-; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 4
-; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 4
-; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <2 x double> undef, <2 x double>* undef, align 4
-; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <4 x float> undef, <4 x float>* undef, align 4
-; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <8 x half> undef, <8 x half>* undef, align 4
-; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 2
-; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 4
-; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = load <2 x i8>, <2 x i8>* undef, align 2
-; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = load <4 x i8>, <4 x i8>* undef, align 4
+; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: store <4 x i64> undef, ptr undef, align 4
+; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: store <8 x i32> undef, ptr undef, align 4
+; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: store <16 x i16> undef, ptr undef, align 4
+; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: store <32 x i8> undef, ptr undef, align 4
+; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: store <4 x double> undef, ptr undef, align 4
+; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: store <8 x float> undef, ptr undef, align 4
+; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: store <16 x half> undef, ptr undef, align 4
+; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <2 x i64> undef, ptr undef, align 4
+; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <4 x i32> undef, ptr undef, align 4
+; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <8 x i16> undef, ptr undef, align 4
+; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <16 x i8> undef, ptr undef, align 4
+; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <2 x double> undef, ptr undef, align 4
+; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <4 x float> undef, ptr undef, align 4
+; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <8 x half> undef, ptr undef, align 4
+; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i8> undef, ptr undef, align 2
+; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> undef, ptr undef, align 4
+; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = load <2 x i8>, ptr undef, align 2
+; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = load <4 x i8>, ptr undef, align 4
; SLOW_MISALIGNED_128_STORE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- store <4 x i64> undef, <4 x i64> * undef
- store <8 x i32> undef, <8 x i32> * undef
- store <16 x i16> undef, <16 x i16> * undef
- store <32 x i8> undef, <32 x i8> * undef
+ store <4 x i64> undef, ptr undef
+ store <8 x i32> undef, ptr undef
+ store <16 x i16> undef, ptr undef
+ store <32 x i8> undef, ptr undef
- store <4 x double> undef, <4 x double> * undef
- store <8 x float> undef, <8 x float> * undef
- store <16 x half> undef, <16 x half> * undef
+ store <4 x double> undef, ptr undef
+ store <8 x float> undef, ptr undef
+ store <16 x half> undef, ptr undef
- store <2 x i64> undef, <2 x i64> * undef
- store <4 x i32> undef, <4 x i32> * undef
- store <8 x i16> undef, <8 x i16> * undef
- store <16 x i8> undef, <16 x i8> * undef
+ store <2 x i64> undef, ptr undef
+ store <4 x i32> undef, ptr undef
+ store <8 x i16> undef, ptr undef
+ store <16 x i8> undef, ptr undef
- store <2 x double> undef, <2 x double> * undef
- store <4 x float> undef, <4 x float> * undef
- store <8 x half> undef, <8 x half> * undef
+ store <2 x double> undef, ptr undef
+ store <4 x float> undef, ptr undef
+ store <8 x half> undef, ptr undef
; We scalarize the loads/stores because there is no vector register name for
; these types (they get extended to v.4h/v.2s).
- store <2 x i8> undef, <2 x i8> * undef
- store <4 x i8> undef, <4 x i8> * undef
- load <2 x i8> , <2 x i8> * undef
- load <4 x i8> , <4 x i8> * undef
+ store <2 x i8> undef, ptr undef
+ store <4 x i8> undef, ptr undef
+ load <2 x i8> , ptr undef
+ load <4 x i8> , ptr undef
ret void
}
define void @masked_gathers(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) #0 {
; CHECK-LABEL: 'masked_gathers'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask, <vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0i64(<vscale x 1 x i64*> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask, <vscale x 1 x i64> zeroinitializer)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask, <vscale x 1 x i64> zeroinitializer)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-VSCALE-2-LABEL: 'masked_gathers'
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask, <vscale x 8 x i32> zeroinitializer)
-; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0i64(<vscale x 1 x i64*> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask, <vscale x 1 x i64> zeroinitializer)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask, <vscale x 8 x i32> zeroinitializer)
+; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask, <vscale x 1 x i64> zeroinitializer)
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-VSCALE-1-LABEL: 'masked_gathers'
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask, <vscale x 8 x i32> zeroinitializer)
-; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0i64(<vscale x 1 x i64*> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask, <vscale x 1 x i64> zeroinitializer)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask, <vscale x 8 x i32> zeroinitializer)
+; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask, <vscale x 1 x i64> zeroinitializer)
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask, <vscale x 4 x i32> zeroinitializer)
- %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32(<vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask, <vscale x 8 x i32> zeroinitializer)
- %res.nxv1i64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64(<vscale x 1 x i64*> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask, <vscale x 1 x i64> zeroinitializer)
+ %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask, <vscale x 4 x i32> zeroinitializer)
+ %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32(<vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask, <vscale x 8 x i32> zeroinitializer)
+ %res.nxv1i64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64(<vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask, <vscale x 1 x i64> zeroinitializer)
ret void
}
define void @masked_gathers_tune_generic(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) #1 {
; CHECK-LABEL: 'masked_gathers_tune_generic'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask, <vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0i64(<vscale x 1 x i64*> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask, <vscale x 1 x i64> zeroinitializer)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask, <vscale x 1 x i64> zeroinitializer)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-VSCALE-2-LABEL: 'masked_gathers_tune_generic'
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask, <vscale x 8 x i32> zeroinitializer)
-; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0i64(<vscale x 1 x i64*> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask, <vscale x 1 x i64> zeroinitializer)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask, <vscale x 8 x i32> zeroinitializer)
+; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask, <vscale x 1 x i64> zeroinitializer)
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-VSCALE-1-LABEL: 'masked_gathers_tune_generic'
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask, <vscale x 8 x i32> zeroinitializer)
-; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0i64(<vscale x 1 x i64*> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask, <vscale x 1 x i64> zeroinitializer)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask, <vscale x 8 x i32> zeroinitializer)
+; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask, <vscale x 1 x i64> zeroinitializer)
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask, <vscale x 4 x i32> zeroinitializer)
- %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32(<vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask, <vscale x 8 x i32> zeroinitializer)
- %res.nxv1i64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64(<vscale x 1 x i64*> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask, <vscale x 1 x i64> zeroinitializer)
+ %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask, <vscale x 4 x i32> zeroinitializer)
+ %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32(<vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask, <vscale x 8 x i32> zeroinitializer)
+ %res.nxv1i64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64(<vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask, <vscale x 1 x i64> zeroinitializer)
ret void
}
define void @masked_gathers_no_vscale_range() #2 {
; CHECK-LABEL: 'masked_gathers_no_vscale_range'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4f64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv2f64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8f32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4f32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv2f32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0f32(<vscale x 2 x float*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %res.nxv16i16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0i16(<vscale x 16 x i16*> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4f64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv2f64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8f32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4f32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv2f32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %res.nxv16i16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-VSCALE-2-LABEL: 'masked_gathers_no_vscale_range'
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4f64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv2f64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8f32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4f32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv2f32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0f32(<vscale x 2 x float*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %res.nxv16i16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0i16(<vscale x 16 x i16*> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4f64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv2f64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8f32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4f32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv2f32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %res.nxv16i16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv8i16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv4i16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-VSCALE-1-LABEL: 'masked_gathers_no_vscale_range'
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv4f64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res.nxv2f64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv8f32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv4f32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res.nxv2f32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0f32(<vscale x 2 x float*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv16i16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0i16(<vscale x 16 x i16*> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv8i16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv4i16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv4f64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res.nxv2f64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv8f32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv4f32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res.nxv2f32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res.nxv16i16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res.nxv8i16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res.nxv4i16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %res.nxv4f64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64(<vscale x 4 x double*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
- %res.nxv2f64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+ %res.nxv4f64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
+ %res.nxv2f64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
- %res.nxv8f32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32(<vscale x 8 x float*> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
- %res.nxv4f32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x float*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
- %res.nxv2f32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+ %res.nxv8f32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
+ %res.nxv4f32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+ %res.nxv2f32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
- %res.nxv16i16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16(<vscale x 16 x i16*> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
- %res.nxv8i16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16(<vscale x 8 x i16*> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
- %res.nxv4i16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16(<vscale x 4 x i16*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+ %res.nxv16i16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
+ %res.nxv8i16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+ %res.nxv4i16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
ret void
}
attributes #1 = { "target-features"="+sve" vscale_range(1, 16) "tune-cpu"="generic" }
attributes #2 = { "target-features"="+sve" }
-declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x i32*>, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.masked.gather.nxv8i32(<vscale x 8 x i32*>, i32, <vscale x 8 x i1>, <vscale x 8 x i32>)
-declare <vscale x 1 x i64> @llvm.masked.gather.nxv1i64(<vscale x 1 x i64*>, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)
-declare <vscale x 4 x double> @llvm.masked.gather.nxv4f64(<vscale x 4 x double*>, i32, <vscale x 4 x i1>, <vscale x 4 x double>)
-declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*>, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
-declare <vscale x 8 x float> @llvm.masked.gather.nxv8f32(<vscale x 8 x float*>, i32, <vscale x 8 x i1>, <vscale x 8 x float>)
-declare <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x float*>, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
-declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*>, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
-declare <vscale x 16 x i16> @llvm.masked.gather.nxv16i16(<vscale x 16 x i16*>, i32, <vscale x 16 x i1>, <vscale x 16 x i16>)
-declare <vscale x 8 x i16> @llvm.masked.gather.nxv8i16(<vscale x 8 x i16*>, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
-declare <vscale x 4 x i16> @llvm.masked.gather.nxv4i16(<vscale x 4 x i16*>, i32, <vscale x 4 x i1>, <vscale x 4 x i16>)
+declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.masked.gather.nxv8i32(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x i32>)
+declare <vscale x 1 x i64> @llvm.masked.gather.nxv1i64(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)
+declare <vscale x 4 x double> @llvm.masked.gather.nxv4f64(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x double>)
+declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
+declare <vscale x 8 x float> @llvm.masked.gather.nxv8f32(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x float>)
+declare <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
+declare <vscale x 16 x i16> @llvm.masked.gather.nxv16i16(<vscale x 16 x ptr>, i32, <vscale x 16 x i1>, <vscale x 16 x i16>)
+declare <vscale x 8 x i16> @llvm.masked.gather.nxv8i16(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <vscale x 4 x i16> @llvm.masked.gather.nxv4i16(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x i16>)
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
-define void @load_store(<vscale x 1 x i128>* %ptrs) {
+define void @load_store(ptr %ptrs) {
; CHECK-LABEL: 'load_store'
-; CHECK-NEXT: Invalid cost for instruction: %load1 = load <vscale x 1 x i128>, <vscale x 1 x i128>* undef
-; CHECK-NEXT: Invalid cost for instruction: %load2 = load <vscale x 2 x i128>, <vscale x 2 x i128>* undef
-; CHECK-NEXT: Invalid cost for instruction: %load3 = load <vscale x 1 x fp128>, <vscale x 1 x fp128>* undef
-; CHECK-NEXT: Invalid cost for instruction: %load4 = load <vscale x 2 x fp128>, <vscale x 2 x fp128>* undef
-; CHECK-NEXT: Invalid cost for instruction: store <vscale x 1 x i128> %load1, <vscale x 1 x i128>* %ptrs
- %load1 = load <vscale x 1 x i128>, <vscale x 1 x i128>* undef
- %load2 = load <vscale x 2 x i128>, <vscale x 2 x i128>* undef
- %load3 = load <vscale x 1 x fp128>, <vscale x 1 x fp128>* undef
- %load4 = load <vscale x 2 x fp128>, <vscale x 2 x fp128>* undef
- store <vscale x 1 x i128> %load1, <vscale x 1 x i128>* %ptrs
+; CHECK-NEXT: Invalid cost for instruction: %load1 = load <vscale x 1 x i128>, ptr undef
+; CHECK-NEXT: Invalid cost for instruction: %load2 = load <vscale x 2 x i128>, ptr undef
+; CHECK-NEXT: Invalid cost for instruction: %load3 = load <vscale x 1 x fp128>, ptr undef
+; CHECK-NEXT: Invalid cost for instruction: %load4 = load <vscale x 2 x fp128>, ptr undef
+; CHECK-NEXT: Invalid cost for instruction: store <vscale x 1 x i128> %load1, ptr %ptrs
+ %load1 = load <vscale x 1 x i128>, ptr undef
+ %load2 = load <vscale x 2 x i128>, ptr undef
+ %load3 = load <vscale x 1 x fp128>, ptr undef
+ %load4 = load <vscale x 2 x fp128>, ptr undef
+ store <vscale x 1 x i128> %load1, ptr %ptrs
ret void
}
-define void @masked_load_store(<vscale x 1 x i128>* %ptrs, <vscale x 1 x i128>* %val, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru) {
+define void @masked_load_store(ptr %ptrs, ptr %val, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru) {
; CHECK-LABEL: 'masked_load_store'
-; CHECK-NEXT: Invalid cost for instruction: %mload = call <vscale x 1 x i128> @llvm.masked.load.nxv1i128.p0nxv1i128(<vscale x 1 x i128>* %val, i32 8, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru)
-; CHECK-NEXT: Invalid cost for instruction: call void @llvm.masked.store.nxv1i128.p0nxv1i128(<vscale x 1 x i128> %mload, <vscale x 1 x i128>* %ptrs, i32 8, <vscale x 1 x i1> %mask)
- %mload = call <vscale x 1 x i128> @llvm.masked.load.nxv1i128(<vscale x 1 x i128>* %val, i32 8, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru)
- call void @llvm.masked.store.nxv1i128(<vscale x 1 x i128> %mload, <vscale x 1 x i128>* %ptrs, i32 8, <vscale x 1 x i1> %mask)
+; CHECK-NEXT: Invalid cost for instruction: %mload = call <vscale x 1 x i128> @llvm.masked.load.nxv1i128.p0(ptr %val, i32 8, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru)
+; CHECK-NEXT: Invalid cost for instruction: call void @llvm.masked.store.nxv1i128.p0(<vscale x 1 x i128> %mload, ptr %ptrs, i32 8, <vscale x 1 x i1> %mask)
+ %mload = call <vscale x 1 x i128> @llvm.masked.load.nxv1i128(ptr %val, i32 8, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru)
+ call void @llvm.masked.store.nxv1i128(<vscale x 1 x i128> %mload, ptr %ptrs, i32 8, <vscale x 1 x i1> %mask)
ret void
}
-define void @masked_gather_scatter(<vscale x 1 x i128*> %ptrs, <vscale x 1 x i128*> %val, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru) {
+define void @masked_gather_scatter(<vscale x 1 x ptr> %ptrs, <vscale x 1 x ptr> %val, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru) {
; CHECK-LABEL: 'masked_gather_scatter'
-; CHECK-NEXT: Invalid cost for instruction: %mgather = call <vscale x 1 x i128> @llvm.masked.gather.nxv1i128.nxv1p0i128(<vscale x 1 x i128*> %val, i32 0, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru)
-; CHECK-NEXT: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i128.nxv1p0i128(<vscale x 1 x i128> %mgather, <vscale x 1 x i128*> %ptrs, i32 0, <vscale x 1 x i1> %mask)
- %mgather = call <vscale x 1 x i128> @llvm.masked.gather.nxv1i128(<vscale x 1 x i128*> %val, i32 0, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru)
- call void @llvm.masked.scatter.nxv1i128(<vscale x 1 x i128> %mgather, <vscale x 1 x i128*> %ptrs, i32 0, <vscale x 1 x i1> %mask)
+; CHECK-NEXT: Invalid cost for instruction: %mgather = call <vscale x 1 x i128> @llvm.masked.gather.nxv1i128.nxv1p0(<vscale x 1 x ptr> %val, i32 0, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru)
+; CHECK-NEXT: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i128.nxv1p0(<vscale x 1 x i128> %mgather, <vscale x 1 x ptr> %ptrs, i32 0, <vscale x 1 x i1> %mask)
+ %mgather = call <vscale x 1 x i128> @llvm.masked.gather.nxv1i128(<vscale x 1 x ptr> %val, i32 0, <vscale x 1 x i1> %mask, <vscale x 1 x i128> %passthru)
+ call void @llvm.masked.scatter.nxv1i128(<vscale x 1 x i128> %mgather, <vscale x 1 x ptr> %ptrs, i32 0, <vscale x 1 x i1> %mask)
ret void
}
-declare <vscale x 1 x i128> @llvm.masked.load.nxv1i128(<vscale x 1 x i128>*, i32, <vscale x 1 x i1>, <vscale x 1 x i128>)
-declare <vscale x 1 x i128> @llvm.masked.gather.nxv1i128(<vscale x 1 x i128*>, i32, <vscale x 1 x i1>, <vscale x 1 x i128>)
+declare <vscale x 1 x i128> @llvm.masked.load.nxv1i128(ptr, i32, <vscale x 1 x i1>, <vscale x 1 x i128>)
+declare <vscale x 1 x i128> @llvm.masked.gather.nxv1i128(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i128>)
-declare void @llvm.masked.store.nxv1i128(<vscale x 1 x i128>, <vscale x 1 x i128>*, i32, <vscale x 1 x i1>)
-declare void @llvm.masked.scatter.nxv1i128(<vscale x 1 x i128>, <vscale x 1 x i128*>, i32, <vscale x 1 x i1>)
+declare void @llvm.masked.store.nxv1i128(<vscale x 1 x i128>, ptr, i32, <vscale x 1 x i1>)
+declare void @llvm.masked.scatter.nxv1i128(<vscale x 1 x i128>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
ret void
}
-define <vscale x 4 x i32> @masked_gather_nxv4i32(<vscale x 4 x i32*> %ld, <vscale x 4 x i1> %masks, <vscale x 4 x i32> %passthru) {
+define <vscale x 4 x i32> @masked_gather_nxv4i32(<vscale x 4 x ptr> %ld, <vscale x 4 x i1> %masks, <vscale x 4 x i32> %passthru) {
; CHECK-LABEL: 'masked_gather_nxv4i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %ld, i32 0, <vscale x 4 x i1> %masks, <vscale x 4 x i32> %passthru)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ld, i32 0, <vscale x 4 x i1> %masks, <vscale x 4 x i32> %passthru)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <vscale x 4 x i32> %res
;
; TYPE_BASED_ONLY-LABEL: 'masked_gather_nxv4i32'
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %ld, i32 0, <vscale x 4 x i1> %masks, <vscale x 4 x i32> %passthru)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ld, i32 0, <vscale x 4 x i1> %masks, <vscale x 4 x i32> %passthru)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <vscale x 4 x i32> %res
;
- %res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x i32*> %ld, i32 0, <vscale x 4 x i1> %masks, <vscale x 4 x i32> %passthru)
+ %res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x ptr> %ld, i32 0, <vscale x 4 x i1> %masks, <vscale x 4 x i32> %passthru)
ret <vscale x 4 x i32> %res
}
-define <vscale x 8 x i32> @masked_gather_nxv8i32(<vscale x 8 x i32*> %ld, <vscale x 8 x i1> %masks, <vscale x 8 x i32> %passthru) {
+define <vscale x 8 x i32> @masked_gather_nxv8i32(<vscale x 8 x ptr> %ld, <vscale x 8 x i1> %masks, <vscale x 8 x i32> %passthru) {
; CHECK-LABEL: 'masked_gather_nxv8i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ld, i32 0, <vscale x 8 x i1> %masks, <vscale x 8 x i32> %passthru)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %res = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ld, i32 0, <vscale x 8 x i1> %masks, <vscale x 8 x i32> %passthru)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <vscale x 8 x i32> %res
;
; TYPE_BASED_ONLY-LABEL: 'masked_gather_nxv8i32'
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %res = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ld, i32 0, <vscale x 8 x i1> %masks, <vscale x 8 x i32> %passthru)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %res = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ld, i32 0, <vscale x 8 x i1> %masks, <vscale x 8 x i32> %passthru)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <vscale x 8 x i32> %res
;
- %res = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32(<vscale x 8 x i32*> %ld, i32 0, <vscale x 8 x i1> %masks, <vscale x 8 x i32> %passthru)
+ %res = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32(<vscale x 8 x ptr> %ld, i32 0, <vscale x 8 x i1> %masks, <vscale x 8 x i32> %passthru)
ret <vscale x 8 x i32> %res
}
-define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ld, <4 x i1> %masks, <4 x i32> %passthru) {
+define <4 x i32> @masked_gather_v4i32(<4 x ptr> %ld, <4 x i1> %masks, <4 x i32> %passthru) {
; CHECK-LABEL: 'masked_gather_v4i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; TYPE_BASED_ONLY-LABEL: 'masked_gather_v4i32'
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
- %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
+ %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
ret <4 x i32> %res
}
-define <1 x i128> @masked_gather_v1i128(<1 x i128*> %ld, <1 x i1> %masks, <1 x i128> %passthru) {
+define <1 x i128> @masked_gather_v1i128(<1 x ptr> %ld, <1 x i1> %masks, <1 x i128> %passthru) {
; CHECK-LABEL: 'masked_gather_v1i128'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <1 x i128> @llvm.masked.gather.v1i128.v1p0i128(<1 x i128*> %ld, i32 0, <1 x i1> %masks, <1 x i128> %passthru)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <1 x i128> @llvm.masked.gather.v1i128.v1p0(<1 x ptr> %ld, i32 0, <1 x i1> %masks, <1 x i128> %passthru)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <1 x i128> %res
;
; TYPE_BASED_ONLY-LABEL: 'masked_gather_v1i128'
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <1 x i128> @llvm.masked.gather.v1i128.v1p0i128(<1 x i128*> %ld, i32 0, <1 x i1> %masks, <1 x i128> %passthru)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <1 x i128> @llvm.masked.gather.v1i128.v1p0(<1 x ptr> %ld, i32 0, <1 x i1> %masks, <1 x i128> %passthru)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <1 x i128> %res
;
- %res = call <1 x i128> @llvm.masked.gather.v1i128.v1p0i128(<1 x i128*> %ld, i32 0, <1 x i1> %masks, <1 x i128> %passthru)
+ %res = call <1 x i128> @llvm.masked.gather.v1i128.v1p0(<1 x ptr> %ld, i32 0, <1 x i1> %masks, <1 x i128> %passthru)
ret <1 x i128> %res
}
-define void @masked_scatter_nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %masks) {
+define void @masked_scatter_nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %masks) {
; CHECK-LABEL: 'masked_scatter_nxv4i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %data, <vscale x 4 x i32*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %data, <vscale x 4 x ptr> %ptrs, i32 0, <vscale x 4 x i1> %masks)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPE_BASED_ONLY-LABEL: 'masked_scatter_nxv4i32'
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %data, <vscale x 4 x i32*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %data, <vscale x 4 x ptr> %ptrs, i32 0, <vscale x 4 x i1> %masks)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- call void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
+ call void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x ptr> %ptrs, i32 0, <vscale x 4 x i1> %masks)
ret void
}
-define void @masked_scatter_nxv8i32(<vscale x 8 x i32> %data, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %masks) {
+define void @masked_scatter_nxv8i32(<vscale x 8 x i32> %data, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %masks) {
; CHECK-LABEL: 'masked_scatter_nxv8i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %data, <vscale x 8 x i32*> %ptrs, i32 0, <vscale x 8 x i1> %masks)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %data, <vscale x 8 x ptr> %ptrs, i32 0, <vscale x 8 x i1> %masks)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPE_BASED_ONLY-LABEL: 'masked_scatter_nxv8i32'
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %data, <vscale x 8 x i32*> %ptrs, i32 0, <vscale x 8 x i1> %masks)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %data, <vscale x 8 x ptr> %ptrs, i32 0, <vscale x 8 x i1> %masks)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- call void @llvm.masked.scatter.nxv8i32(<vscale x 8 x i32> %data, <vscale x 8 x i32*> %ptrs, i32 0, <vscale x 8 x i1> %masks)
+ call void @llvm.masked.scatter.nxv8i32(<vscale x 8 x i32> %data, <vscale x 8 x ptr> %ptrs, i32 0, <vscale x 8 x i1> %masks)
ret void
}
-define void @masked_scatter_v4i32(<4 x i32> %data, <4 x i32*> %ptrs, <4 x i1> %masks) {
+define void @masked_scatter_v4i32(<4 x i32> %data, <4 x ptr> %ptrs, <4 x i1> %masks) {
; CHECK-LABEL: 'masked_scatter_v4i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %data, <4 x i32*> %ptrs, i32 0, <4 x i1> %masks)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data, <4 x ptr> %ptrs, i32 0, <4 x i1> %masks)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPE_BASED_ONLY-LABEL: 'masked_scatter_v4i32'
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %data, <4 x i32*> %ptrs, i32 0, <4 x i1> %masks)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data, <4 x ptr> %ptrs, i32 0, <4 x i1> %masks)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- call void @llvm.masked.scatter.v4i32(<4 x i32> %data, <4 x i32*> %ptrs, i32 0, <4 x i1> %masks)
+ call void @llvm.masked.scatter.v4i32(<4 x i32> %data, <4 x ptr> %ptrs, i32 0, <4 x i1> %masks)
ret void
}
-define void @masked_scatter_v1i128(<1 x i128> %data, <1 x i128*> %ptrs, <1 x i1> %masks) {
+define void @masked_scatter_v1i128(<1 x i128> %data, <1 x ptr> %ptrs, <1 x i1> %masks) {
; CHECK-LABEL: 'masked_scatter_v1i128'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v1i128.v1p0i128(<1 x i128> %data, <1 x i128*> %ptrs, i32 0, <1 x i1> %masks)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v1i128.v1p0(<1 x i128> %data, <1 x ptr> %ptrs, i32 0, <1 x i1> %masks)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPE_BASED_ONLY-LABEL: 'masked_scatter_v1i128'
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i128.v1p0i128(<1 x i128> %data, <1 x i128*> %ptrs, i32 0, <1 x i1> %masks)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i128.v1p0(<1 x i128> %data, <1 x ptr> %ptrs, i32 0, <1 x i1> %masks)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- call void @llvm.masked.scatter.v1i128.v1p0i128(<1 x i128> %data, <1 x i128*> %ptrs, i32 0, <1 x i1> %masks)
+ call void @llvm.masked.scatter.v1i128.v1p0(<1 x i128> %data, <1 x ptr> %ptrs, i32 0, <1 x i1> %masks)
ret void
}
declare <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x i32*> %ptrs, i32 %align, <vscale x 4 x i1> %masks, <vscale x 4 x i32> %passthru)
-declare <vscale x 8 x i32> @llvm.masked.gather.nxv8i32(<vscale x 8 x i32*> %ptrs, i32 %align, <vscale x 8 x i1> %masks, <vscale x 8 x i32> %passthru)
-declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 %align, <4 x i1> %masks, <4 x i32> %passthru)
-declare <1 x i128> @llvm.masked.gather.v1i128.v1p0i128(<1 x i128*>, i32, <1 x i1>, <1 x i128>)
-declare void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32*> %ptrs, i32 %align, <vscale x 4 x i1> %masks)
-declare void @llvm.masked.scatter.nxv8i32(<vscale x 8 x i32> %data, <vscale x 8 x i32*> %ptrs, i32 %align, <vscale x 8 x i1> %masks)
-declare void @llvm.masked.scatter.v4i32(<4 x i32> %data, <4 x i32*> %ptrs, i32 %align, <4 x i1> %masks)
-declare void @llvm.masked.scatter.v1i128.v1p0i128(<1 x i128> %data, <1 x i128*> %ptrs, i32 %align, <1 x i1> %masks)
+declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x ptr> %ptrs, i32 %align, <vscale x 4 x i1> %masks, <vscale x 4 x i32> %passthru)
+declare <vscale x 8 x i32> @llvm.masked.gather.nxv8i32(<vscale x 8 x ptr> %ptrs, i32 %align, <vscale x 8 x i1> %masks, <vscale x 8 x i32> %passthru)
+declare <4 x i32> @llvm.masked.gather.v4i32(<4 x ptr> %ptrs, i32 %align, <4 x i1> %masks, <4 x i32> %passthru)
+declare <1 x i128> @llvm.masked.gather.v1i128.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i128>)
+declare void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x ptr> %ptrs, i32 %align, <vscale x 4 x i1> %masks)
+declare void @llvm.masked.scatter.nxv8i32(<vscale x 8 x i32> %data, <vscale x 8 x ptr> %ptrs, i32 %align, <vscale x 8 x i1> %masks)
+declare void @llvm.masked.scatter.v4i32(<4 x i32> %data, <4 x ptr> %ptrs, i32 %align, <4 x i1> %masks)
+declare void @llvm.masked.scatter.v1i128.v1p0(<1 x i128> %data, <1 x ptr> %ptrs, i32 %align, <1 x i1> %masks)
attributes #0 = { "target-features"="+sve,+bf16" }
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.nxv16i8
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res.nxv32i8
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %res.nxv1i64
- %res.nxv8i8 = load <vscale x 8 x i8>, <vscale x 8 x i8>* undef
- %res.nxv16i8 = load <vscale x 16 x i8>, <vscale x 16 x i8>* undef
- %res.nxv32i8 = load <vscale x 32 x i8>, <vscale x 32 x i8>* undef
- %res.nxv1i64 = load <vscale x 1 x i64>, <vscale x 1 x i64>* undef
+ %res.nxv8i8 = load <vscale x 8 x i8>, ptr undef
+ %res.nxv16i8 = load <vscale x 16 x i8>, ptr undef
+ %res.nxv32i8 = load <vscale x 32 x i8>, ptr undef
+ %res.nxv1i64 = load <vscale x 1 x i64>, ptr undef
ret void
}
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <vscale x 16 x i8>
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <vscale x 32 x i8>
; CHECK-NEXT: Cost Model: Invalid cost for instruction: store <vscale x 1 x i64>
- store <vscale x 8 x i8> undef, <vscale x 8 x i8>* undef
- store <vscale x 16 x i8> undef, <vscale x 16 x i8>* undef
- store <vscale x 32 x i8> undef, <vscale x 32 x i8>* undef
- store <vscale x 1 x i64> undef, <vscale x 1 x i64>* undef
+ store <vscale x 8 x i8> undef, ptr undef
+ store <vscale x 16 x i8> undef, ptr undef
+ store <vscale x 32 x i8> undef, ptr undef
+ store <vscale x 1 x i64> undef, ptr undef
ret void
}
define void @masked_scatters(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) #0 {
; CHECK-LABEL: 'masked_scatters'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> undef, <vscale x 1 x i64*> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-VSCALE-2-LABEL: 'masked_scatters'
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
-; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> undef, <vscale x 1 x i64*> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
+; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-VSCALE-1-LABEL: 'masked_scatters'
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
-; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> undef, <vscale x 1 x i64*> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
+; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- call void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
- call void @llvm.masked.scatter.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
- call void @llvm.masked.scatter.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64*> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
+ call void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
+ call void @llvm.masked.scatter.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
+ call void @llvm.masked.scatter.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
ret void
}
define void @masked_scatters_tune_generic(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) #1 {
; CHECK-LABEL: 'masked_scatters_tune_generic'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> undef, <vscale x 1 x i64*> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-VSCALE-2-LABEL: 'masked_scatters_tune_generic'
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
-; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> undef, <vscale x 1 x i64*> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
+; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-VSCALE-1-LABEL: 'masked_scatters_tune_generic'
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
-; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> undef, <vscale x 1 x i64*> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
+; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- call void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
- call void @llvm.masked.scatter.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
- call void @llvm.masked.scatter.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64*> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
+ call void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
+ call void @llvm.masked.scatter.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
+ call void @llvm.masked.scatter.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
ret void
}
define void @masked_scatters_no_vscale_range() #2 {
; CHECK-LABEL: 'masked_scatters_no_vscale_range'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> undef, <vscale x 4 x double*> undef, i32 1, <vscale x 4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> undef, <vscale x 2 x double*> undef, i32 1, <vscale x 2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> undef, <vscale x 8 x float*> undef, i32 1, <vscale x 8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> undef, <vscale x 4 x float*> undef, i32 1, <vscale x 4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> undef, <vscale x 2 x float*> undef, i32 1, <vscale x 2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16(<vscale x 16 x i16> undef, <vscale x 16 x i16*> undef, i32 1, <vscale x 16 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> undef, <vscale x 8 x i16*> undef, i32 1, <vscale x 8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> undef, <vscale x 4 x i16*> undef, i32 1, <vscale x 4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-VSCALE-2-LABEL: 'masked_scatters_no_vscale_range'
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> undef, <vscale x 4 x double*> undef, i32 1, <vscale x 4 x i1> undef)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> undef, <vscale x 2 x double*> undef, i32 1, <vscale x 2 x i1> undef)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> undef, <vscale x 8 x float*> undef, i32 1, <vscale x 8 x i1> undef)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> undef, <vscale x 4 x float*> undef, i32 1, <vscale x 4 x i1> undef)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> undef, <vscale x 2 x float*> undef, i32 1, <vscale x 2 x i1> undef)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16(<vscale x 16 x i16> undef, <vscale x 16 x i16*> undef, i32 1, <vscale x 16 x i1> undef)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> undef, <vscale x 8 x i16*> undef, i32 1, <vscale x 8 x i1> undef)
-; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> undef, <vscale x 4 x i16*> undef, i32 1, <vscale x 4 x i1> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-VSCALE-1-LABEL: 'masked_scatters_no_vscale_range'
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> undef, <vscale x 4 x double*> undef, i32 1, <vscale x 4 x i1> undef)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> undef, <vscale x 2 x double*> undef, i32 1, <vscale x 2 x i1> undef)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> undef, <vscale x 8 x float*> undef, i32 1, <vscale x 8 x i1> undef)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> undef, <vscale x 4 x float*> undef, i32 1, <vscale x 4 x i1> undef)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> undef, <vscale x 2 x float*> undef, i32 1, <vscale x 2 x i1> undef)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16(<vscale x 16 x i16> undef, <vscale x 16 x i16*> undef, i32 1, <vscale x 16 x i1> undef)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> undef, <vscale x 8 x i16*> undef, i32 1, <vscale x 8 x i1> undef)
-; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> undef, <vscale x 4 x i16*> undef, i32 1, <vscale x 4 x i1> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- call void @llvm.masked.scatter.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double*> undef, i32 1, <vscale x 4 x i1> undef)
- call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double*> undef, i32 1, <vscale x 2 x i1> undef)
+ call void @llvm.masked.scatter.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+ call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
- call void @llvm.masked.scatter.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float*> undef, i32 1, <vscale x 8 x i1> undef)
- call void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float*> undef, i32 1, <vscale x 4 x i1> undef)
- call void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float*> undef, i32 1, <vscale x 2 x i1> undef)
+ call void @llvm.masked.scatter.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+ call void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+ call void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
- call void @llvm.masked.scatter.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16*> undef, i32 1, <vscale x 16 x i1> undef)
- call void @llvm.masked.scatter.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16*> undef, i32 1, <vscale x 8 x i1> undef)
- call void @llvm.masked.scatter.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16*> undef, i32 1, <vscale x 4 x i1> undef)
+ call void @llvm.masked.scatter.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
+ call void @llvm.masked.scatter.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+ call void @llvm.masked.scatter.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
ret void
}
attributes #1 = { "target-features"="+sve" vscale_range(1, 16) "tune-cpu"="generic" }
attributes #2 = { "target-features"="+sve" }
-declare void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32*>, i32, <vscale x 4 x i1>)
-declare void @llvm.masked.scatter.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32*>, i32, <vscale x 8 x i1>)
-declare void @llvm.masked.scatter.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64*>, i32, <vscale x 1 x i1>)
-declare void @llvm.masked.scatter.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double*>, i32, <vscale x 4 x i1>)
-declare void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>)
-declare void @llvm.masked.scatter.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float*>, i32, <vscale x 8 x i1>)
-declare void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float*>, i32, <vscale x 4 x i1>)
-declare void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>)
-declare void @llvm.masked.scatter.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16*>, i32, <vscale x 16 x i1>)
-declare void @llvm.masked.scatter.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16*>, i32, <vscale x 8 x i1>)
-declare void @llvm.masked.scatter.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16*>, i32, <vscale x 4 x i1>)
+declare void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
+declare void @llvm.masked.scatter.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
+declare void @llvm.masked.scatter.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
+declare void @llvm.masked.scatter.nxv4f64(<vscale x 4 x double>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
+declare void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv8f32(<vscale x 8 x float>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
+declare void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
+declare void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x ptr>, i32, <vscale x 16 x i1>)
+declare void @llvm.masked.scatter.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
+declare void @llvm.masked.scatter.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
define void @addrspacecast_global_to_flat() #0 {
; ALL-LABEL: 'addrspacecast_global_to_flat'
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i8ptr = addrspacecast i8 addrspace(1)* undef to i8*
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i8ptr = addrspacecast <2 x i8 addrspace(1)*> undef to <2 x i8*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i8ptr = addrspacecast <3 x i8 addrspace(1)*> undef to <3 x i8*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i8ptr = addrspacecast <4 x i8 addrspace(1)*> undef to <4 x i8*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i8ptr = addrspacecast <32 x i8 addrspace(1)*> undef to <32 x i8*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16ptr = addrspacecast i16 addrspace(1)* undef to i16*
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16ptr = addrspacecast <2 x i16 addrspace(1)*> undef to <2 x i16*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i16ptr = addrspacecast <3 x i16 addrspace(1)*> undef to <3 x i16*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16ptr = addrspacecast <4 x i16 addrspace(1)*> undef to <4 x i16*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i16ptr = addrspacecast <32 x i16 addrspace(1)*> undef to <32 x i16*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32ptr = addrspacecast i32 addrspace(1)* undef to i32*
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32ptr = addrspacecast <2 x i32 addrspace(1)*> undef to <2 x i32*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32ptr = addrspacecast <3 x i32 addrspace(1)*> undef to <3 x i32*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32ptr = addrspacecast <4 x i32 addrspace(1)*> undef to <4 x i32*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i32ptr = addrspacecast <32 x i32 addrspace(1)*> undef to <32 x i32*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i64ptr = addrspacecast i64 addrspace(1)* undef to i64*
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64ptr = addrspacecast <2 x i64 addrspace(1)*> undef to <2 x i64*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64ptr = addrspacecast <3 x i64 addrspace(1)*> undef to <3 x i64*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64ptr = addrspacecast <4 x i64 addrspace(1)*> undef to <4 x i64*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i64ptr = addrspacecast <32 x i64 addrspace(1)*> undef to <32 x i64*>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i8ptr = addrspacecast ptr addrspace(1) undef to ptr
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i8ptr = addrspacecast <2 x ptr addrspace(1)> undef to <2 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i8ptr = addrspacecast <3 x ptr addrspace(1)> undef to <3 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i8ptr = addrspacecast <4 x ptr addrspace(1)> undef to <4 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i8ptr = addrspacecast <32 x ptr addrspace(1)> undef to <32 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16ptr = addrspacecast ptr addrspace(1) undef to ptr
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16ptr = addrspacecast <2 x ptr addrspace(1)> undef to <2 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i16ptr = addrspacecast <3 x ptr addrspace(1)> undef to <3 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16ptr = addrspacecast <4 x ptr addrspace(1)> undef to <4 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i16ptr = addrspacecast <32 x ptr addrspace(1)> undef to <32 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32ptr = addrspacecast ptr addrspace(1) undef to ptr
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32ptr = addrspacecast <2 x ptr addrspace(1)> undef to <2 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32ptr = addrspacecast <3 x ptr addrspace(1)> undef to <3 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32ptr = addrspacecast <4 x ptr addrspace(1)> undef to <4 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i32ptr = addrspacecast <32 x ptr addrspace(1)> undef to <32 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i64ptr = addrspacecast ptr addrspace(1) undef to ptr
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64ptr = addrspacecast <2 x ptr addrspace(1)> undef to <2 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64ptr = addrspacecast <3 x ptr addrspace(1)> undef to <3 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64ptr = addrspacecast <4 x ptr addrspace(1)> undef to <4 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i64ptr = addrspacecast <32 x ptr addrspace(1)> undef to <32 x ptr>
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; ALL-SIZE-LABEL: 'addrspacecast_global_to_flat'
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i8ptr = addrspacecast i8 addrspace(1)* undef to i8*
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i8ptr = addrspacecast <2 x i8 addrspace(1)*> undef to <2 x i8*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i8ptr = addrspacecast <3 x i8 addrspace(1)*> undef to <3 x i8*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i8ptr = addrspacecast <4 x i8 addrspace(1)*> undef to <4 x i8*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i8ptr = addrspacecast <32 x i8 addrspace(1)*> undef to <32 x i8*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16ptr = addrspacecast i16 addrspace(1)* undef to i16*
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16ptr = addrspacecast <2 x i16 addrspace(1)*> undef to <2 x i16*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i16ptr = addrspacecast <3 x i16 addrspace(1)*> undef to <3 x i16*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16ptr = addrspacecast <4 x i16 addrspace(1)*> undef to <4 x i16*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i16ptr = addrspacecast <32 x i16 addrspace(1)*> undef to <32 x i16*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32ptr = addrspacecast i32 addrspace(1)* undef to i32*
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32ptr = addrspacecast <2 x i32 addrspace(1)*> undef to <2 x i32*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32ptr = addrspacecast <3 x i32 addrspace(1)*> undef to <3 x i32*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32ptr = addrspacecast <4 x i32 addrspace(1)*> undef to <4 x i32*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i32ptr = addrspacecast <32 x i32 addrspace(1)*> undef to <32 x i32*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i64ptr = addrspacecast i64 addrspace(1)* undef to i64*
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64ptr = addrspacecast <2 x i64 addrspace(1)*> undef to <2 x i64*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64ptr = addrspacecast <3 x i64 addrspace(1)*> undef to <3 x i64*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64ptr = addrspacecast <4 x i64 addrspace(1)*> undef to <4 x i64*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i64ptr = addrspacecast <32 x i64 addrspace(1)*> undef to <32 x i64*>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i8ptr = addrspacecast ptr addrspace(1) undef to ptr
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i8ptr = addrspacecast <2 x ptr addrspace(1)> undef to <2 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i8ptr = addrspacecast <3 x ptr addrspace(1)> undef to <3 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i8ptr = addrspacecast <4 x ptr addrspace(1)> undef to <4 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i8ptr = addrspacecast <32 x ptr addrspace(1)> undef to <32 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16ptr = addrspacecast ptr addrspace(1) undef to ptr
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16ptr = addrspacecast <2 x ptr addrspace(1)> undef to <2 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i16ptr = addrspacecast <3 x ptr addrspace(1)> undef to <3 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16ptr = addrspacecast <4 x ptr addrspace(1)> undef to <4 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i16ptr = addrspacecast <32 x ptr addrspace(1)> undef to <32 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32ptr = addrspacecast ptr addrspace(1) undef to ptr
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32ptr = addrspacecast <2 x ptr addrspace(1)> undef to <2 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32ptr = addrspacecast <3 x ptr addrspace(1)> undef to <3 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32ptr = addrspacecast <4 x ptr addrspace(1)> undef to <4 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i32ptr = addrspacecast <32 x ptr addrspace(1)> undef to <32 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i64ptr = addrspacecast ptr addrspace(1) undef to ptr
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64ptr = addrspacecast <2 x ptr addrspace(1)> undef to <2 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64ptr = addrspacecast <3 x ptr addrspace(1)> undef to <3 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64ptr = addrspacecast <4 x ptr addrspace(1)> undef to <4 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i64ptr = addrspacecast <32 x ptr addrspace(1)> undef to <32 x ptr>
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- %i8ptr = addrspacecast i8 addrspace(1)* undef to i8*
- %v2i8ptr = addrspacecast <2 x i8 addrspace(1)*> undef to <2 x i8*>
- %v3i8ptr = addrspacecast <3 x i8 addrspace(1)*> undef to <3 x i8*>
- %v4i8ptr = addrspacecast <4 x i8 addrspace(1)*> undef to <4 x i8*>
- %v32i8ptr = addrspacecast <32 x i8 addrspace(1)*> undef to <32 x i8*>
- %i16ptr = addrspacecast i16 addrspace(1)* undef to i16*
- %v2i16ptr = addrspacecast <2 x i16 addrspace(1)*> undef to <2 x i16*>
- %v3i16ptr = addrspacecast <3 x i16 addrspace(1)*> undef to <3 x i16*>
- %v4i16ptr = addrspacecast <4 x i16 addrspace(1)*> undef to <4 x i16*>
- %v32i16ptr = addrspacecast <32 x i16 addrspace(1)*> undef to <32 x i16*>
- %i32ptr = addrspacecast i32 addrspace(1)* undef to i32*
- %v2i32ptr = addrspacecast <2 x i32 addrspace(1)*> undef to <2 x i32*>
- %v3i32ptr = addrspacecast <3 x i32 addrspace(1)*> undef to <3 x i32*>
- %v4i32ptr = addrspacecast <4 x i32 addrspace(1)*> undef to <4 x i32*>
- %v32i32ptr = addrspacecast <32 x i32 addrspace(1)*> undef to <32 x i32*>
- %i64ptr = addrspacecast i64 addrspace(1)* undef to i64*
- %v2i64ptr = addrspacecast <2 x i64 addrspace(1)*> undef to <2 x i64*>
- %v3i64ptr = addrspacecast <3 x i64 addrspace(1)*> undef to <3 x i64*>
- %v4i64ptr = addrspacecast <4 x i64 addrspace(1)*> undef to <4 x i64*>
- %v32i64ptr = addrspacecast <32 x i64 addrspace(1)*> undef to <32 x i64*>
+ %i8ptr = addrspacecast ptr addrspace(1) undef to ptr
+ %v2i8ptr = addrspacecast <2 x ptr addrspace(1)> undef to <2 x ptr>
+ %v3i8ptr = addrspacecast <3 x ptr addrspace(1)> undef to <3 x ptr>
+ %v4i8ptr = addrspacecast <4 x ptr addrspace(1)> undef to <4 x ptr>
+ %v32i8ptr = addrspacecast <32 x ptr addrspace(1)> undef to <32 x ptr>
+ %i16ptr = addrspacecast ptr addrspace(1) undef to ptr
+ %v2i16ptr = addrspacecast <2 x ptr addrspace(1)> undef to <2 x ptr>
+ %v3i16ptr = addrspacecast <3 x ptr addrspace(1)> undef to <3 x ptr>
+ %v4i16ptr = addrspacecast <4 x ptr addrspace(1)> undef to <4 x ptr>
+ %v32i16ptr = addrspacecast <32 x ptr addrspace(1)> undef to <32 x ptr>
+ %i32ptr = addrspacecast ptr addrspace(1) undef to ptr
+ %v2i32ptr = addrspacecast <2 x ptr addrspace(1)> undef to <2 x ptr>
+ %v3i32ptr = addrspacecast <3 x ptr addrspace(1)> undef to <3 x ptr>
+ %v4i32ptr = addrspacecast <4 x ptr addrspace(1)> undef to <4 x ptr>
+ %v32i32ptr = addrspacecast <32 x ptr addrspace(1)> undef to <32 x ptr>
+ %i64ptr = addrspacecast ptr addrspace(1) undef to ptr
+ %v2i64ptr = addrspacecast <2 x ptr addrspace(1)> undef to <2 x ptr>
+ %v3i64ptr = addrspacecast <3 x ptr addrspace(1)> undef to <3 x ptr>
+ %v4i64ptr = addrspacecast <4 x ptr addrspace(1)> undef to <4 x ptr>
+ %v32i64ptr = addrspacecast <32 x ptr addrspace(1)> undef to <32 x ptr>
ret void
}
define void @addrspacecast_local_to_flat() #0 {
; ALL-LABEL: 'addrspacecast_local_to_flat'
-; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8ptr = addrspacecast i8 addrspace(3)* undef to i8*
-; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8ptr = addrspacecast <2 x i8 addrspace(3)*> undef to <2 x i8*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i8ptr = addrspacecast <3 x i8 addrspace(3)*> undef to <3 x i8*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8ptr = addrspacecast <4 x i8 addrspace(3)*> undef to <4 x i8*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i8ptr = addrspacecast <32 x i8 addrspace(3)*> undef to <32 x i8*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16ptr = addrspacecast i16 addrspace(3)* undef to i16*
-; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16ptr = addrspacecast <2 x i16 addrspace(3)*> undef to <2 x i16*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i16ptr = addrspacecast <3 x i16 addrspace(3)*> undef to <3 x i16*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16ptr = addrspacecast <4 x i16 addrspace(3)*> undef to <4 x i16*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i16ptr = addrspacecast <32 x i16 addrspace(3)*> undef to <32 x i16*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32ptr = addrspacecast i32 addrspace(3)* undef to i32*
-; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32ptr = addrspacecast <2 x i32 addrspace(3)*> undef to <2 x i32*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32ptr = addrspacecast <3 x i32 addrspace(3)*> undef to <3 x i32*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32ptr = addrspacecast <4 x i32 addrspace(3)*> undef to <4 x i32*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i32ptr = addrspacecast <32 x i32 addrspace(3)*> undef to <32 x i32*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i64ptr = addrspacecast i64 addrspace(3)* undef to i64*
-; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64ptr = addrspacecast <2 x i64 addrspace(3)*> undef to <2 x i64*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i64ptr = addrspacecast <3 x i64 addrspace(3)*> undef to <3 x i64*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i64ptr = addrspacecast <4 x i64 addrspace(3)*> undef to <4 x i64*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i64ptr = addrspacecast <32 x i64 addrspace(3)*> undef to <32 x i64*>
+; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8ptr = addrspacecast ptr addrspace(3) undef to ptr
+; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8ptr = addrspacecast <2 x ptr addrspace(3)> undef to <2 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i8ptr = addrspacecast <3 x ptr addrspace(3)> undef to <3 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8ptr = addrspacecast <4 x ptr addrspace(3)> undef to <4 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i8ptr = addrspacecast <32 x ptr addrspace(3)> undef to <32 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16ptr = addrspacecast ptr addrspace(3) undef to ptr
+; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16ptr = addrspacecast <2 x ptr addrspace(3)> undef to <2 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i16ptr = addrspacecast <3 x ptr addrspace(3)> undef to <3 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16ptr = addrspacecast <4 x ptr addrspace(3)> undef to <4 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i16ptr = addrspacecast <32 x ptr addrspace(3)> undef to <32 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32ptr = addrspacecast ptr addrspace(3) undef to ptr
+; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32ptr = addrspacecast <2 x ptr addrspace(3)> undef to <2 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32ptr = addrspacecast <3 x ptr addrspace(3)> undef to <3 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32ptr = addrspacecast <4 x ptr addrspace(3)> undef to <4 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i32ptr = addrspacecast <32 x ptr addrspace(3)> undef to <32 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i64ptr = addrspacecast ptr addrspace(3) undef to ptr
+; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64ptr = addrspacecast <2 x ptr addrspace(3)> undef to <2 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i64ptr = addrspacecast <3 x ptr addrspace(3)> undef to <3 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i64ptr = addrspacecast <4 x ptr addrspace(3)> undef to <4 x ptr>
+; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i64ptr = addrspacecast <32 x ptr addrspace(3)> undef to <32 x ptr>
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; ALL-SIZE-LABEL: 'addrspacecast_local_to_flat'
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8ptr = addrspacecast i8 addrspace(3)* undef to i8*
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8ptr = addrspacecast <2 x i8 addrspace(3)*> undef to <2 x i8*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i8ptr = addrspacecast <3 x i8 addrspace(3)*> undef to <3 x i8*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8ptr = addrspacecast <4 x i8 addrspace(3)*> undef to <4 x i8*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i8ptr = addrspacecast <32 x i8 addrspace(3)*> undef to <32 x i8*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16ptr = addrspacecast i16 addrspace(3)* undef to i16*
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16ptr = addrspacecast <2 x i16 addrspace(3)*> undef to <2 x i16*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i16ptr = addrspacecast <3 x i16 addrspace(3)*> undef to <3 x i16*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16ptr = addrspacecast <4 x i16 addrspace(3)*> undef to <4 x i16*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i16ptr = addrspacecast <32 x i16 addrspace(3)*> undef to <32 x i16*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32ptr = addrspacecast i32 addrspace(3)* undef to i32*
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32ptr = addrspacecast <2 x i32 addrspace(3)*> undef to <2 x i32*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32ptr = addrspacecast <3 x i32 addrspace(3)*> undef to <3 x i32*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32ptr = addrspacecast <4 x i32 addrspace(3)*> undef to <4 x i32*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i32ptr = addrspacecast <32 x i32 addrspace(3)*> undef to <32 x i32*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i64ptr = addrspacecast i64 addrspace(3)* undef to i64*
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64ptr = addrspacecast <2 x i64 addrspace(3)*> undef to <2 x i64*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i64ptr = addrspacecast <3 x i64 addrspace(3)*> undef to <3 x i64*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i64ptr = addrspacecast <4 x i64 addrspace(3)*> undef to <4 x i64*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i64ptr = addrspacecast <32 x i64 addrspace(3)*> undef to <32 x i64*>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8ptr = addrspacecast ptr addrspace(3) undef to ptr
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8ptr = addrspacecast <2 x ptr addrspace(3)> undef to <2 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i8ptr = addrspacecast <3 x ptr addrspace(3)> undef to <3 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8ptr = addrspacecast <4 x ptr addrspace(3)> undef to <4 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i8ptr = addrspacecast <32 x ptr addrspace(3)> undef to <32 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16ptr = addrspacecast ptr addrspace(3) undef to ptr
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16ptr = addrspacecast <2 x ptr addrspace(3)> undef to <2 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i16ptr = addrspacecast <3 x ptr addrspace(3)> undef to <3 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16ptr = addrspacecast <4 x ptr addrspace(3)> undef to <4 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i16ptr = addrspacecast <32 x ptr addrspace(3)> undef to <32 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32ptr = addrspacecast ptr addrspace(3) undef to ptr
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32ptr = addrspacecast <2 x ptr addrspace(3)> undef to <2 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i32ptr = addrspacecast <3 x ptr addrspace(3)> undef to <3 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32ptr = addrspacecast <4 x ptr addrspace(3)> undef to <4 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i32ptr = addrspacecast <32 x ptr addrspace(3)> undef to <32 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i64ptr = addrspacecast ptr addrspace(3) undef to ptr
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64ptr = addrspacecast <2 x ptr addrspace(3)> undef to <2 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3i64ptr = addrspacecast <3 x ptr addrspace(3)> undef to <3 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i64ptr = addrspacecast <4 x ptr addrspace(3)> undef to <4 x ptr>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i64ptr = addrspacecast <32 x ptr addrspace(3)> undef to <32 x ptr>
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- %i8ptr = addrspacecast i8 addrspace(3)* undef to i8*
- %v2i8ptr = addrspacecast <2 x i8 addrspace(3)*> undef to <2 x i8*>
- %v3i8ptr = addrspacecast <3 x i8 addrspace(3)*> undef to <3 x i8*>
- %v4i8ptr = addrspacecast <4 x i8 addrspace(3)*> undef to <4 x i8*>
- %v32i8ptr = addrspacecast <32 x i8 addrspace(3)*> undef to <32 x i8*>
- %i16ptr = addrspacecast i16 addrspace(3)* undef to i16*
- %v2i16ptr = addrspacecast <2 x i16 addrspace(3)*> undef to <2 x i16*>
- %v3i16ptr = addrspacecast <3 x i16 addrspace(3)*> undef to <3 x i16*>
- %v4i16ptr = addrspacecast <4 x i16 addrspace(3)*> undef to <4 x i16*>
- %v32i16ptr = addrspacecast <32 x i16 addrspace(3)*> undef to <32 x i16*>
- %i32ptr = addrspacecast i32 addrspace(3)* undef to i32*
- %v2i32ptr = addrspacecast <2 x i32 addrspace(3)*> undef to <2 x i32*>
- %v3i32ptr = addrspacecast <3 x i32 addrspace(3)*> undef to <3 x i32*>
- %v4i32ptr = addrspacecast <4 x i32 addrspace(3)*> undef to <4 x i32*>
- %v32i32ptr = addrspacecast <32 x i32 addrspace(3)*> undef to <32 x i32*>
- %i64ptr = addrspacecast i64 addrspace(3)* undef to i64*
- %v2i64ptr = addrspacecast <2 x i64 addrspace(3)*> undef to <2 x i64*>
- %v3i64ptr = addrspacecast <3 x i64 addrspace(3)*> undef to <3 x i64*>
- %v4i64ptr = addrspacecast <4 x i64 addrspace(3)*> undef to <4 x i64*>
- %v32i64ptr = addrspacecast <32 x i64 addrspace(3)*> undef to <32 x i64*>
+ %i8ptr = addrspacecast ptr addrspace(3) undef to ptr
+ %v2i8ptr = addrspacecast <2 x ptr addrspace(3)> undef to <2 x ptr>
+ %v3i8ptr = addrspacecast <3 x ptr addrspace(3)> undef to <3 x ptr>
+ %v4i8ptr = addrspacecast <4 x ptr addrspace(3)> undef to <4 x ptr>
+ %v32i8ptr = addrspacecast <32 x ptr addrspace(3)> undef to <32 x ptr>
+ %i16ptr = addrspacecast ptr addrspace(3) undef to ptr
+ %v2i16ptr = addrspacecast <2 x ptr addrspace(3)> undef to <2 x ptr>
+ %v3i16ptr = addrspacecast <3 x ptr addrspace(3)> undef to <3 x ptr>
+ %v4i16ptr = addrspacecast <4 x ptr addrspace(3)> undef to <4 x ptr>
+ %v32i16ptr = addrspacecast <32 x ptr addrspace(3)> undef to <32 x ptr>
+ %i32ptr = addrspacecast ptr addrspace(3) undef to ptr
+ %v2i32ptr = addrspacecast <2 x ptr addrspace(3)> undef to <2 x ptr>
+ %v3i32ptr = addrspacecast <3 x ptr addrspace(3)> undef to <3 x ptr>
+ %v4i32ptr = addrspacecast <4 x ptr addrspace(3)> undef to <4 x ptr>
+ %v32i32ptr = addrspacecast <32 x ptr addrspace(3)> undef to <32 x ptr>
+ %i64ptr = addrspacecast ptr addrspace(3) undef to ptr
+ %v2i64ptr = addrspacecast <2 x ptr addrspace(3)> undef to <2 x ptr>
+ %v3i64ptr = addrspacecast <3 x ptr addrspace(3)> undef to <3 x ptr>
+ %v4i64ptr = addrspacecast <4 x ptr addrspace(3)> undef to <4 x ptr>
+ %v32i64ptr = addrspacecast <32 x ptr addrspace(3)> undef to <32 x ptr>
ret void
}
define void @addrspacecast_flat_to_local() #0 {
; ALL-LABEL: 'addrspacecast_flat_to_local'
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i8ptr = addrspacecast i8* undef to i8 addrspace(3)*
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i8ptr = addrspacecast <2 x i8*> undef to <2 x i8 addrspace(3)*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i8ptr = addrspacecast <3 x i8*> undef to <3 x i8 addrspace(3)*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i8ptr = addrspacecast <4 x i8*> undef to <4 x i8 addrspace(3)*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i8ptr = addrspacecast <32 x i8*> undef to <32 x i8 addrspace(3)*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16ptr = addrspacecast i16* undef to i16 addrspace(3)*
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16ptr = addrspacecast <2 x i16*> undef to <2 x i16 addrspace(3)*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i16ptr = addrspacecast <3 x i16*> undef to <3 x i16 addrspace(3)*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16ptr = addrspacecast <4 x i16*> undef to <4 x i16 addrspace(3)*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i16ptr = addrspacecast <32 x i16*> undef to <32 x i16 addrspace(3)*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32ptr = addrspacecast i32* undef to i32 addrspace(3)*
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32ptr = addrspacecast <2 x i32*> undef to <2 x i32 addrspace(3)*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32ptr = addrspacecast <3 x i32*> undef to <3 x i32 addrspace(3)*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32ptr = addrspacecast <4 x i32*> undef to <4 x i32 addrspace(3)*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i32ptr = addrspacecast <32 x i32*> undef to <32 x i32 addrspace(3)*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i64ptr = addrspacecast i64* undef to i64 addrspace(3)*
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64ptr = addrspacecast <2 x i64*> undef to <2 x i64 addrspace(3)*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64ptr = addrspacecast <3 x i64*> undef to <3 x i64 addrspace(3)*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64ptr = addrspacecast <4 x i64*> undef to <4 x i64 addrspace(3)*>
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i64ptr = addrspacecast <32 x i64*> undef to <32 x i64 addrspace(3)*>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i8ptr = addrspacecast ptr undef to ptr addrspace(3)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i8ptr = addrspacecast <2 x ptr> undef to <2 x ptr addrspace(3)>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i8ptr = addrspacecast <3 x ptr> undef to <3 x ptr addrspace(3)>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i8ptr = addrspacecast <4 x ptr> undef to <4 x ptr addrspace(3)>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i8ptr = addrspacecast <32 x ptr> undef to <32 x ptr addrspace(3)>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16ptr = addrspacecast ptr undef to ptr addrspace(3)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16ptr = addrspacecast <2 x ptr> undef to <2 x ptr addrspace(3)>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i16ptr = addrspacecast <3 x ptr> undef to <3 x ptr addrspace(3)>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16ptr = addrspacecast <4 x ptr> undef to <4 x ptr addrspace(3)>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i16ptr = addrspacecast <32 x ptr> undef to <32 x ptr addrspace(3)>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32ptr = addrspacecast ptr undef to ptr addrspace(3)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32ptr = addrspacecast <2 x ptr> undef to <2 x ptr addrspace(3)>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32ptr = addrspacecast <3 x ptr> undef to <3 x ptr addrspace(3)>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32ptr = addrspacecast <4 x ptr> undef to <4 x ptr addrspace(3)>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i32ptr = addrspacecast <32 x ptr> undef to <32 x ptr addrspace(3)>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i64ptr = addrspacecast ptr undef to ptr addrspace(3)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64ptr = addrspacecast <2 x ptr> undef to <2 x ptr addrspace(3)>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64ptr = addrspacecast <3 x ptr> undef to <3 x ptr addrspace(3)>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64ptr = addrspacecast <4 x ptr> undef to <4 x ptr addrspace(3)>
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i64ptr = addrspacecast <32 x ptr> undef to <32 x ptr addrspace(3)>
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; ALL-SIZE-LABEL: 'addrspacecast_flat_to_local'
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i8ptr = addrspacecast i8* undef to i8 addrspace(3)*
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i8ptr = addrspacecast <2 x i8*> undef to <2 x i8 addrspace(3)*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i8ptr = addrspacecast <3 x i8*> undef to <3 x i8 addrspace(3)*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i8ptr = addrspacecast <4 x i8*> undef to <4 x i8 addrspace(3)*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i8ptr = addrspacecast <32 x i8*> undef to <32 x i8 addrspace(3)*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16ptr = addrspacecast i16* undef to i16 addrspace(3)*
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16ptr = addrspacecast <2 x i16*> undef to <2 x i16 addrspace(3)*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i16ptr = addrspacecast <3 x i16*> undef to <3 x i16 addrspace(3)*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16ptr = addrspacecast <4 x i16*> undef to <4 x i16 addrspace(3)*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i16ptr = addrspacecast <32 x i16*> undef to <32 x i16 addrspace(3)*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32ptr = addrspacecast i32* undef to i32 addrspace(3)*
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32ptr = addrspacecast <2 x i32*> undef to <2 x i32 addrspace(3)*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32ptr = addrspacecast <3 x i32*> undef to <3 x i32 addrspace(3)*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32ptr = addrspacecast <4 x i32*> undef to <4 x i32 addrspace(3)*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i32ptr = addrspacecast <32 x i32*> undef to <32 x i32 addrspace(3)*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i64ptr = addrspacecast i64* undef to i64 addrspace(3)*
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64ptr = addrspacecast <2 x i64*> undef to <2 x i64 addrspace(3)*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64ptr = addrspacecast <3 x i64*> undef to <3 x i64 addrspace(3)*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64ptr = addrspacecast <4 x i64*> undef to <4 x i64 addrspace(3)*>
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i64ptr = addrspacecast <32 x i64*> undef to <32 x i64 addrspace(3)*>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i8ptr = addrspacecast ptr undef to ptr addrspace(3)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i8ptr = addrspacecast <2 x ptr> undef to <2 x ptr addrspace(3)>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i8ptr = addrspacecast <3 x ptr> undef to <3 x ptr addrspace(3)>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i8ptr = addrspacecast <4 x ptr> undef to <4 x ptr addrspace(3)>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i8ptr = addrspacecast <32 x ptr> undef to <32 x ptr addrspace(3)>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16ptr = addrspacecast ptr undef to ptr addrspace(3)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16ptr = addrspacecast <2 x ptr> undef to <2 x ptr addrspace(3)>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i16ptr = addrspacecast <3 x ptr> undef to <3 x ptr addrspace(3)>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16ptr = addrspacecast <4 x ptr> undef to <4 x ptr addrspace(3)>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i16ptr = addrspacecast <32 x ptr> undef to <32 x ptr addrspace(3)>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32ptr = addrspacecast ptr undef to ptr addrspace(3)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32ptr = addrspacecast <2 x ptr> undef to <2 x ptr addrspace(3)>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i32ptr = addrspacecast <3 x ptr> undef to <3 x ptr addrspace(3)>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32ptr = addrspacecast <4 x ptr> undef to <4 x ptr addrspace(3)>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i32ptr = addrspacecast <32 x ptr> undef to <32 x ptr addrspace(3)>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i64ptr = addrspacecast ptr undef to ptr addrspace(3)
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64ptr = addrspacecast <2 x ptr> undef to <2 x ptr addrspace(3)>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3i64ptr = addrspacecast <3 x ptr> undef to <3 x ptr addrspace(3)>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64ptr = addrspacecast <4 x ptr> undef to <4 x ptr addrspace(3)>
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i64ptr = addrspacecast <32 x ptr> undef to <32 x ptr addrspace(3)>
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- %i8ptr = addrspacecast i8* undef to i8 addrspace(3)*
- %v2i8ptr = addrspacecast <2 x i8*> undef to <2 x i8 addrspace(3)*>
- %v3i8ptr = addrspacecast <3 x i8*> undef to <3 x i8 addrspace(3)*>
- %v4i8ptr = addrspacecast <4 x i8*> undef to <4 x i8 addrspace(3)*>
- %v32i8ptr = addrspacecast <32 x i8*> undef to <32 x i8 addrspace(3)*>
- %i16ptr = addrspacecast i16* undef to i16 addrspace(3)*
- %v2i16ptr = addrspacecast <2 x i16*> undef to <2 x i16 addrspace(3)*>
- %v3i16ptr = addrspacecast <3 x i16*> undef to <3 x i16 addrspace(3)*>
- %v4i16ptr = addrspacecast <4 x i16*> undef to <4 x i16 addrspace(3)*>
- %v32i16ptr = addrspacecast <32 x i16*> undef to <32 x i16 addrspace(3)*>
- %i32ptr = addrspacecast i32* undef to i32 addrspace(3)*
- %v2i32ptr = addrspacecast <2 x i32*> undef to <2 x i32 addrspace(3)*>
- %v3i32ptr = addrspacecast <3 x i32*> undef to <3 x i32 addrspace(3)*>
- %v4i32ptr = addrspacecast <4 x i32*> undef to <4 x i32 addrspace(3)*>
- %v32i32ptr = addrspacecast <32 x i32*> undef to <32 x i32 addrspace(3)*>
- %i64ptr = addrspacecast i64* undef to i64 addrspace(3)*
- %v2i64ptr = addrspacecast <2 x i64*> undef to <2 x i64 addrspace(3)*>
- %v3i64ptr = addrspacecast <3 x i64*> undef to <3 x i64 addrspace(3)*>
- %v4i64ptr = addrspacecast <4 x i64*> undef to <4 x i64 addrspace(3)*>
- %v32i64ptr = addrspacecast <32 x i64*> undef to <32 x i64 addrspace(3)*>
+ %i8ptr = addrspacecast ptr undef to ptr addrspace(3)
+ %v2i8ptr = addrspacecast <2 x ptr> undef to <2 x ptr addrspace(3)>
+ %v3i8ptr = addrspacecast <3 x ptr> undef to <3 x ptr addrspace(3)>
+ %v4i8ptr = addrspacecast <4 x ptr> undef to <4 x ptr addrspace(3)>
+ %v32i8ptr = addrspacecast <32 x ptr> undef to <32 x ptr addrspace(3)>
+ %i16ptr = addrspacecast ptr undef to ptr addrspace(3)
+ %v2i16ptr = addrspacecast <2 x ptr> undef to <2 x ptr addrspace(3)>
+ %v3i16ptr = addrspacecast <3 x ptr> undef to <3 x ptr addrspace(3)>
+ %v4i16ptr = addrspacecast <4 x ptr> undef to <4 x ptr addrspace(3)>
+ %v32i16ptr = addrspacecast <32 x ptr> undef to <32 x ptr addrspace(3)>
+ %i32ptr = addrspacecast ptr undef to ptr addrspace(3)
+ %v2i32ptr = addrspacecast <2 x ptr> undef to <2 x ptr addrspace(3)>
+ %v3i32ptr = addrspacecast <3 x ptr> undef to <3 x ptr addrspace(3)>
+ %v4i32ptr = addrspacecast <4 x ptr> undef to <4 x ptr addrspace(3)>
+ %v32i32ptr = addrspacecast <32 x ptr> undef to <32 x ptr addrspace(3)>
+ %i64ptr = addrspacecast ptr undef to ptr addrspace(3)
+ %v2i64ptr = addrspacecast <2 x ptr> undef to <2 x ptr addrspace(3)>
+ %v3i64ptr = addrspacecast <3 x ptr> undef to <3 x ptr addrspace(3)>
+ %v4i64ptr = addrspacecast <4 x ptr> undef to <4 x ptr addrspace(3)>
+ %v32i64ptr = addrspacecast <32 x ptr> undef to <32 x ptr addrspace(3)>
ret void
}
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck --check-prefixes=ALL-SIZE %s
; END.
-define amdgpu_kernel void @test_br_cost(i32 addrspace(1)* %vaddr, i32 %b) #0 {
+define amdgpu_kernel void @test_br_cost(ptr addrspace(1) %vaddr, i32 %b) #0 {
; ALL-LABEL: 'test_br_cost'
; ALL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: br i1 undef, label %bb1, label %bb2
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: br label %bb2
define void @test_geps() {
; ALL-LABEL: 'test_geps'
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* undef, i32 0
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* undef, i32 0
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds i32, i32* undef, i32 0
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds i64, i64* undef, i32 0
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = getelementptr inbounds float, float* undef, i32 0
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds double, double* undef, i32 0
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
-; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep0 = getelementptr inbounds i8, i8* undef, i64 9223372036854775807
-; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep1 = getelementptr inbounds i8, i8* undef, i128 295147905179352825855
+; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep0 = getelementptr inbounds i8, ptr undef, i64 9223372036854775807
+; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep1 = getelementptr inbounds i8, ptr undef, i128 295147905179352825855
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; ALL-SIZE-LABEL: 'test_geps'
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* undef, i32 0
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* undef, i32 0
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds i32, i32* undef, i32 0
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds i64, i64* undef, i32 0
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = getelementptr inbounds float, float* undef, i32 0
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds double, double* undef, i32 0
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep0 = getelementptr inbounds i8, i8* undef, i64 9223372036854775807
-; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep1 = getelementptr inbounds i8, i8* undef, i128 295147905179352825855
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep0 = getelementptr inbounds i8, ptr undef, i64 9223372036854775807
+; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep1 = getelementptr inbounds i8, ptr undef, i128 295147905179352825855
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- %a0 = getelementptr inbounds i8, i8* undef, i32 0
- %a1 = getelementptr inbounds i16, i16* undef, i32 0
- %a2 = getelementptr inbounds i32, i32* undef, i32 0
- %a3 = getelementptr inbounds i64, i64* undef, i32 0
- %a4 = getelementptr inbounds float, float* undef, i32 0
- %a5 = getelementptr inbounds double, double* undef, i32 0
- %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
- %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
- %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
- %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
- %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
- %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
; Check that we handle outlandishly large GEPs properly. This is unlikely to
; be a valid pointer, but LLVM still generates GEPs like this sometimes in
; dead code.
; This GEP has index INT64_MAX, which is cost 1.
- %giant_gep0 = getelementptr inbounds i8, i8* undef, i64 9223372036854775807
+ %giant_gep0 = getelementptr inbounds i8, ptr undef, i64 9223372036854775807
; This GEP index wraps around to -1, which is cost 0.
- %giant_gep1 = getelementptr inbounds i8, i8* undef, i128 295147905179352825855
+ %giant_gep1 = getelementptr inbounds i8, ptr undef, i128 295147905179352825855
ret void
}
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s --check-prefix=CHECK
; Check that cost is 1 for unusual load to register sized load.
-define i32 @loadUnusualIntegerWithTrunc(i128* %ptr) {
+define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualIntegerWithTrunc'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, i128* %ptr, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc = trunc i128 %out to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %trunc
;
- %out = load i128, i128* %ptr
+ %out = load i128, ptr %ptr
%trunc = trunc i128 %out to i32
ret i32 %trunc
}
-define i128 @loadUnusualInteger(i128* %ptr) {
+define i128 @loadUnusualInteger(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualInteger'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %out = load i128, i128* %ptr, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %out = load i128, ptr %ptr, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i128 %out
;
- %out = load i128, i128* %ptr
+ %out = load i128, ptr %ptr
ret i128 %out
}
%T432 = type <4 x i32>
%T464 = type <4 x i64>
-define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) {
+define void @direct(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST: function 'direct'
- %v0 = load %T432, %T432* %loadaddr
+ %v0 = load %T432, ptr %loadaddr
; ASM: vld1.64
- %v1 = load %T432, %T432* %loadaddr2
+ %v1 = load %T432, ptr %loadaddr2
; ASM: vld1.64
%r3 = add %T432 %v0, %v1
; COST: cost of 1 for instruction: {{.*}} add <4 x i32>
; ASM: vadd.i32
- store %T432 %r3, %T432* %storeaddr
+ store %T432 %r3, ptr %storeaddr
; ASM: vst1.64
ret void
}
-define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
+define void @ups1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST: function 'ups1632'
- %v0 = load %T416, %T416* %loadaddr
+ %v0 = load %T416, ptr %loadaddr
; ASM: vldr
- %v1 = load %T416, %T416* %loadaddr2
+ %v1 = load %T416, ptr %loadaddr2
; ASM: vldr
%r1 = sext %T416 %v0 to %T432
%r2 = sext %T416 %v1 to %T432
%r3 = add %T432 %r1, %r2
; COST: cost of 1 for instruction: {{.*}} add <4 x i32>
; ASM: vaddl.s16
- store %T432 %r3, %T432* %storeaddr
+ store %T432 %r3, ptr %storeaddr
; ASM: vst1.64
ret void
}
-define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
+define void @upu1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST: function 'upu1632'
- %v0 = load %T416, %T416* %loadaddr
+ %v0 = load %T416, ptr %loadaddr
; ASM: vldr
- %v1 = load %T416, %T416* %loadaddr2
+ %v1 = load %T416, ptr %loadaddr2
; ASM: vldr
%r1 = zext %T416 %v0 to %T432
%r2 = zext %T416 %v1 to %T432
%r3 = add %T432 %r1, %r2
; COST: cost of 1 for instruction: {{.*}} add <4 x i32>
; ASM: vaddl.u16
- store %T432 %r3, %T432* %storeaddr
+ store %T432 %r3, ptr %storeaddr
; ASM: vst1.64
ret void
}
-define void @ups3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
+define void @ups3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST: function 'ups3264'
- %v0 = load %T232, %T232* %loadaddr
+ %v0 = load %T232, ptr %loadaddr
; ASM: vldr
- %v1 = load %T232, %T232* %loadaddr2
+ %v1 = load %T232, ptr %loadaddr2
; ASM: vldr
%r3 = add %T232 %v0, %v1
; ASM: vadd.i32
%st = sext %T232 %r3 to %T264
; ASM: vmovl.s32
; COST: cost of 1 for instruction: {{.*}} sext <2 x i32> {{.*}} to <2 x i64>
- store %T264 %st, %T264* %storeaddr
+ store %T264 %st, ptr %storeaddr
; ASM: vst1.64
ret void
}
-define void @upu3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
+define void @upu3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST: function 'upu3264'
- %v0 = load %T232, %T232* %loadaddr
+ %v0 = load %T232, ptr %loadaddr
; ASM: vldr
- %v1 = load %T232, %T232* %loadaddr2
+ %v1 = load %T232, ptr %loadaddr2
; ASM: vldr
%r3 = add %T232 %v0, %v1
; ASM: vadd.i32
%st = zext %T232 %r3 to %T264
; ASM: vmovl.u32
; COST: cost of 1 for instruction: {{.*}} zext <2 x i32> {{.*}} to <2 x i64>
- store %T264 %st, %T264* %storeaddr
+ store %T264 %st, ptr %storeaddr
; ASM: vst1.64
ret void
}
-define void @dn3216(%T432* %loadaddr, %T432* %loadaddr2, %T416* %storeaddr) {
+define void @dn3216(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST: function 'dn3216'
- %v0 = load %T432, %T432* %loadaddr
+ %v0 = load %T432, ptr %loadaddr
; ASM: vld1.64
- %v1 = load %T432, %T432* %loadaddr2
+ %v1 = load %T432, ptr %loadaddr2
; ASM: vld1.64
%r3 = add %T432 %v0, %v1
; ASM: vadd.i32
%st = trunc %T432 %r3 to %T416
; ASM: vmovn.i32
; COST: cost of 1 for instruction: {{.*}} trunc <4 x i32> {{.*}} to <4 x i16>
- store %T416 %st, %T416* %storeaddr
+ store %T416 %st, ptr %storeaddr
; ASM: vstr
ret void
}
define i32 @load_extends() {
; CHECK-NEON-RECIP-LABEL: 'load_extends'
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, ptr undef, align 1
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, ptr undef, align 2
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, ptr undef, align 4
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, ptr undef, align 2
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, ptr undef, align 4
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, ptr undef, align 4
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, ptr undef, align 8
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-RECIP-LABEL: 'load_extends'
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, ptr undef, align 1
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, ptr undef, align 2
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, ptr undef, align 4
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %loadv2i8 = load <2 x i8>, ptr undef, align 2
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i8 = load <4 x i8>, ptr undef, align 4
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8i8 = load <8 x i8>, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv16i8 = load <16 x i8>, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %loadv2i16 = load <2 x i16>, ptr undef, align 4
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i16 = load <4 x i16>, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8i16 = load <8 x i16>, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %loadv2i32 = load <2 x i32>, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i32 = load <4 x i32>, ptr undef, align 8
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-RECIP-LABEL: 'load_extends'
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, ptr undef, align 1
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, ptr undef, align 2
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, ptr undef, align 4
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i8 = load <2 x i8>, ptr undef, align 2
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i8 = load <4 x i8>, ptr undef, align 4
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i8 = load <8 x i8>, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv16i8 = load <16 x i8>, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i16 = load <2 x i16>, ptr undef, align 4
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i16 = load <4 x i16>, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i16 = load <8 x i16>, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i32 = load <2 x i32>, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i32 = load <4 x i32>, ptr undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-RECIP-LABEL: 'load_extends'
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, ptr undef, align 1
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, ptr undef, align 2
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, ptr undef, align 4
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i8 = load <2 x i8>, ptr undef, align 2
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i8 = load <4 x i8>, ptr undef, align 4
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i8 = load <8 x i8>, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv16i8 = load <16 x i8>, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i16 = load <2 x i16>, ptr undef, align 4
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i16 = load <4 x i16>, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8i16 = load <8 x i16>, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2i32 = load <2 x i32>, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4i32 = load <4 x i32>, ptr undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-RECIP-LABEL: 'load_extends'
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, ptr undef, align 1
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, ptr undef, align 2
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, ptr undef, align 4
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, ptr undef, align 2
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, ptr undef, align 4
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, ptr undef, align 4
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, ptr undef, align 8
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-SIZE-LABEL: 'load_extends'
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, ptr undef, align 1
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, ptr undef, align 2
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, ptr undef, align 2
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, ptr undef, align 8
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-SIZE-LABEL: 'load_extends'
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, ptr undef, align 1
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, ptr undef, align 2
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, ptr undef, align 4
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, ptr undef, align 2
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, ptr undef, align 4
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, ptr undef, align 4
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, ptr undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-SIZE-LABEL: 'load_extends'
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, ptr undef, align 1
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, ptr undef, align 2
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, ptr undef, align 4
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, ptr undef, align 2
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, ptr undef, align 4
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, ptr undef, align 4
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, ptr undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-SIZE-LABEL: 'load_extends'
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, ptr undef, align 1
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, ptr undef, align 2
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, ptr undef, align 4
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = load <2 x i8>, ptr undef, align 2
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = load <4 x i8>, ptr undef, align 4
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = load <8 x i8>, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = load <16 x i8>, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i16 = load <2 x i16>, ptr undef, align 4
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = load <4 x i16>, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = load <8 x i16>, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i32 = load <2 x i32>, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = load <4 x i32>, ptr undef, align 8
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext i8 %loadi8 to i32
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
- %loadi8 = load i8, i8* undef
- %loadi16 = load i16, i16* undef
- %loadi32 = load i32, i32* undef
- %loadv2i8 = load <2 x i8>, <2 x i8>* undef
- %loadv4i8 = load <4 x i8>, <4 x i8>* undef
- %loadv8i8 = load <8 x i8>, <8 x i8>* undef
- %loadv16i8 = load <16 x i8>, <16 x i8>* undef
- %loadv2i16 = load <2 x i16>, <2 x i16>* undef
- %loadv4i16 = load <4 x i16>, <4 x i16>* undef
- %loadv8i16 = load <8 x i16>, <8 x i16>* undef
- %loadv2i32 = load <2 x i32>, <2 x i32>* undef
- %loadv4i32 = load <4 x i32>, <4 x i32>* undef
+ %loadi8 = load i8, ptr undef
+ %loadi16 = load i16, ptr undef
+ %loadi32 = load i32, ptr undef
+ %loadv2i8 = load <2 x i8>, ptr undef
+ %loadv4i8 = load <4 x i8>, ptr undef
+ %loadv8i8 = load <8 x i8>, ptr undef
+ %loadv16i8 = load <16 x i8>, ptr undef
+ %loadv2i16 = load <2 x i16>, ptr undef
+ %loadv4i16 = load <4 x i16>, ptr undef
+ %loadv8i16 = load <8 x i16>, ptr undef
+ %loadv2i32 = load <2 x i32>, ptr undef
+ %loadv4i32 = load <4 x i32>, ptr undef
%r0 = sext i8 %loadi8 to i16
%r1 = zext i8 %loadi8 to i16
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, i16* undef, align 2
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, i16* undef, align 2
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, i32* undef, align 4
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> %v2816, <2 x i8>* undef, align 2
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> %v2832, <2 x i8>* undef, align 2
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> %v2864, <2 x i8>* undef, align 2
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: store <4 x i8> %v4816, <4 x i8>* undef, align 4
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: store <4 x i8> %v4832, <4 x i8>* undef, align 4
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: store <4 x i8> %v4864, <4 x i8>* undef, align 4
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, ptr undef, align 1
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, ptr undef, align 1
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, ptr undef, align 1
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, ptr undef, align 2
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, ptr undef, align 2
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, ptr undef, align 4
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> %v2816, ptr undef, align 2
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> %v2832, ptr undef, align 2
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> %v2864, ptr undef, align 2
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: store <4 x i8> %v4816, ptr undef, align 4
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: store <4 x i8> %v4832, ptr undef, align 4
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: store <4 x i8> %v4864, ptr undef, align 4
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i16> %v21632, ptr undef, align 4
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i16> %v21664, ptr undef, align 4
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, ptr undef, align 8
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-RECIP-LABEL: 'store_trunc'
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, i16* undef, align 2
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, i16* undef, align 2
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, i32* undef, align 4
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i8> %v2816, <2 x i8>* undef, align 2
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i8> %v2832, <2 x i8>* undef, align 2
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i8> %v2864, <2 x i8>* undef, align 2
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> %v4816, <4 x i8>* undef, align 4
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> %v4832, <4 x i8>* undef, align 4
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> %v4864, <4 x i8>* undef, align 4
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, ptr undef, align 1
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, ptr undef, align 1
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, ptr undef, align 1
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, ptr undef, align 2
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, ptr undef, align 2
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, ptr undef, align 4
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i8> %v2816, ptr undef, align 2
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i8> %v2832, ptr undef, align 2
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i8> %v2864, ptr undef, align 2
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> %v4816, ptr undef, align 4
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> %v4832, ptr undef, align 4
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> %v4864, ptr undef, align 4
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8> %v8816, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8> %v8832, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8> %v8864, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16816, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16832, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16864, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i16> %v21632, ptr undef, align 4
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i16> %v21664, ptr undef, align 4
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i16> %v41632, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i16> %v41664, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v81632, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v81664, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i32> %v23264, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> %v43264, ptr undef, align 8
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-RECIP-LABEL: 'store_trunc'
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, i16* undef, align 2
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, i16* undef, align 2
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, i32* undef, align 4
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> %v2816, <2 x i8>* undef, align 2
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> %v2832, <2 x i8>* undef, align 2
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> %v2864, <2 x i8>* undef, align 2
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> %v4816, <4 x i8>* undef, align 4
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> %v4832, <4 x i8>* undef, align 4
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> %v4864, <4 x i8>* undef, align 4
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, ptr undef, align 1
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, ptr undef, align 1
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, ptr undef, align 1
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, ptr undef, align 2
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, ptr undef, align 2
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, ptr undef, align 4
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> %v2816, ptr undef, align 2
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> %v2832, ptr undef, align 2
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> %v2864, ptr undef, align 2
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> %v4816, ptr undef, align 4
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> %v4832, ptr undef, align 4
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> %v4864, ptr undef, align 4
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8816, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8832, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8864, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16816, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16832, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16864, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> %v21632, ptr undef, align 4
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> %v21664, ptr undef, align 4
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i16> %v41632, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i16> %v41664, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81632, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81664, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> %v23264, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> %v43264, ptr undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-RECIP-LABEL: 'store_trunc'
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, i16* undef, align 2
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, i16* undef, align 2
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, i32* undef, align 4
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> %v2816, <2 x i8>* undef, align 2
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> %v2832, <2 x i8>* undef, align 2
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> %v2864, <2 x i8>* undef, align 2
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> %v4816, <4 x i8>* undef, align 4
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> %v4832, <4 x i8>* undef, align 4
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> %v4864, <4 x i8>* undef, align 4
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, ptr undef, align 1
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, ptr undef, align 1
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, ptr undef, align 1
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, ptr undef, align 2
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, ptr undef, align 2
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, ptr undef, align 4
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> %v2816, ptr undef, align 2
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> %v2832, ptr undef, align 2
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> %v2864, ptr undef, align 2
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> %v4816, ptr undef, align 4
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> %v4832, ptr undef, align 4
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i8> %v4864, ptr undef, align 4
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8816, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8832, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i8> %v8864, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16816, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16832, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> %v16864, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> %v21632, ptr undef, align 4
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> %v21664, ptr undef, align 4
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i16> %v41632, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i16> %v41664, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81632, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81664, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> %v23264, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> %v43264, ptr undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-RECIP-LABEL: 'store_trunc'
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, i16* undef, align 2
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, i16* undef, align 2
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, i32* undef, align 4
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> %v2816, <2 x i8>* undef, align 2
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> %v2832, <2 x i8>* undef, align 2
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> %v2864, <2 x i8>* undef, align 2
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: store <4 x i8> %v4816, <4 x i8>* undef, align 4
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: store <4 x i8> %v4832, <4 x i8>* undef, align 4
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: store <4 x i8> %v4864, <4 x i8>* undef, align 4
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, ptr undef, align 1
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, ptr undef, align 1
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, ptr undef, align 1
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, ptr undef, align 2
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, ptr undef, align 2
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, ptr undef, align 4
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> %v2816, ptr undef, align 2
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> %v2832, ptr undef, align 2
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> %v2864, ptr undef, align 2
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: store <4 x i8> %v4816, ptr undef, align 4
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: store <4 x i8> %v4832, ptr undef, align 4
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: store <4 x i8> %v4864, ptr undef, align 4
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i16> %v21632, ptr undef, align 4
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i16> %v21664, ptr undef, align 4
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, ptr undef, align 8
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-SIZE-LABEL: 'store_trunc'
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, i16* undef, align 2
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, i16* undef, align 2
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, i32* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2816, <2 x i8>* undef, align 2
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2832, <2 x i8>* undef, align 2
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2864, <2 x i8>* undef, align 2
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4816, <4 x i8>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4832, <4 x i8>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4864, <4 x i8>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, ptr undef, align 1
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, ptr undef, align 1
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, ptr undef, align 1
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, ptr undef, align 2
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, ptr undef, align 2
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2816, ptr undef, align 2
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2832, ptr undef, align 2
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2864, ptr undef, align 2
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4816, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4832, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4864, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21632, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21664, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, ptr undef, align 8
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-SIZE-LABEL: 'store_trunc'
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, i16* undef, align 2
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, i16* undef, align 2
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, i32* undef, align 4
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2816, <2 x i8>* undef, align 2
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2832, <2 x i8>* undef, align 2
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2864, <2 x i8>* undef, align 2
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4816, <4 x i8>* undef, align 4
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4832, <4 x i8>* undef, align 4
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4864, <4 x i8>* undef, align 4
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, ptr undef, align 1
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, ptr undef, align 1
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, ptr undef, align 1
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, ptr undef, align 2
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, ptr undef, align 2
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, ptr undef, align 4
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2816, ptr undef, align 2
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2832, ptr undef, align 2
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2864, ptr undef, align 2
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4816, ptr undef, align 4
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4832, ptr undef, align 4
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4864, ptr undef, align 4
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21632, ptr undef, align 4
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21664, ptr undef, align 4
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, ptr undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-SIZE-LABEL: 'store_trunc'
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, i16* undef, align 2
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, i16* undef, align 2
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, i32* undef, align 4
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2816, <2 x i8>* undef, align 2
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2832, <2 x i8>* undef, align 2
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2864, <2 x i8>* undef, align 2
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4816, <4 x i8>* undef, align 4
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4832, <4 x i8>* undef, align 4
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4864, <4 x i8>* undef, align 4
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, ptr undef, align 1
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, ptr undef, align 1
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, ptr undef, align 1
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, ptr undef, align 2
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, ptr undef, align 2
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, ptr undef, align 4
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2816, ptr undef, align 2
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2832, ptr undef, align 2
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2864, ptr undef, align 2
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4816, ptr undef, align 4
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4832, ptr undef, align 4
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4864, ptr undef, align 4
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21632, ptr undef, align 4
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21664, ptr undef, align 4
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, ptr undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-SIZE-LABEL: 'store_trunc'
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, i8* undef, align 1
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, i8* undef, align 1
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, i8* undef, align 1
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, i16* undef, align 2
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, i16* undef, align 2
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, i32* undef, align 4
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2816, <2 x i8>* undef, align 2
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2832, <2 x i8>* undef, align 2
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2864, <2 x i8>* undef, align 2
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4816, <4 x i8>* undef, align 4
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4832, <4 x i8>* undef, align 4
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4864, <4 x i8>* undef, align 4
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, <8 x i8>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, <8 x i8>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, <8 x i8>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i816, ptr undef, align 1
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i832, ptr undef, align 1
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 %i864, ptr undef, align 1
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, ptr undef, align 2
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, ptr undef, align 2
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, ptr undef, align 4
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2816, ptr undef, align 2
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2832, ptr undef, align 2
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> %v2864, ptr undef, align 2
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4816, ptr undef, align 4
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4832, ptr undef, align 4
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %v4864, ptr undef, align 4
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8816, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8832, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8864, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16816, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16832, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16864, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21632, ptr undef, align 4
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> %v21664, ptr undef, align 4
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41632, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v41664, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81632, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, ptr undef, align 8
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%i816 = trunc i16 undef to i8
%v23264 = trunc <2 x i64> undef to <2 x i32>
%v43264 = trunc <4 x i64> undef to <4 x i32>
- store i8 %i816, i8* undef
- store i8 %i832, i8* undef
- store i8 %i864, i8* undef
- store i16 %i1632, i16* undef
- store i16 %i1664, i16* undef
- store i32 %i3264, i32* undef
- store <2 x i8> %v2816, <2 x i8>* undef
- store <2 x i8> %v2832, <2 x i8>* undef
- store <2 x i8> %v2864, <2 x i8>* undef
- store <4 x i8> %v4816, <4 x i8>* undef
- store <4 x i8> %v4832, <4 x i8>* undef
- store <4 x i8> %v4864, <4 x i8>* undef
- store <8 x i8> %v8816, <8 x i8>* undef
- store <8 x i8> %v8832, <8 x i8>* undef
- store <8 x i8> %v8864, <8 x i8>* undef
- store <16 x i8> %v16816, <16 x i8>* undef
- store <16 x i8> %v16832, <16 x i8>* undef
- store <16 x i8> %v16864, <16 x i8>* undef
- store <2 x i16> %v21632, <2 x i16>* undef
- store <2 x i16> %v21664, <2 x i16>* undef
- store <4 x i16> %v41632, <4 x i16>* undef
- store <4 x i16> %v41664, <4 x i16>* undef
- store <8 x i16> %v81632, <8 x i16>* undef
- store <8 x i16> %v81664, <8 x i16>* undef
- store <2 x i32> %v23264, <2 x i32>* undef
- store <4 x i32> %v43264, <4 x i32>* undef
+ store i8 %i816, ptr undef
+ store i8 %i832, ptr undef
+ store i8 %i864, ptr undef
+ store i16 %i1632, ptr undef
+ store i16 %i1664, ptr undef
+ store i32 %i3264, ptr undef
+ store <2 x i8> %v2816, ptr undef
+ store <2 x i8> %v2832, ptr undef
+ store <2 x i8> %v2864, ptr undef
+ store <4 x i8> %v4816, ptr undef
+ store <4 x i8> %v4832, ptr undef
+ store <4 x i8> %v4864, ptr undef
+ store <8 x i8> %v8816, ptr undef
+ store <8 x i8> %v8832, ptr undef
+ store <8 x i8> %v8864, ptr undef
+ store <16 x i8> %v16816, ptr undef
+ store <16 x i8> %v16832, ptr undef
+ store <16 x i8> %v16864, ptr undef
+ store <2 x i16> %v21632, ptr undef
+ store <2 x i16> %v21664, ptr undef
+ store <4 x i16> %v41632, ptr undef
+ store <4 x i16> %v41664, ptr undef
+ store <8 x i16> %v81632, ptr undef
+ store <8 x i16> %v81664, ptr undef
+ store <2 x i32> %v23264, ptr undef
+ store <4 x i32> %v43264, ptr undef
ret i32 undef
}
define i32 @load_fpextends() {
; CHECK-NEON-RECIP-LABEL: 'load_fpextends'
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, ptr undef, align 2
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, ptr undef, align 4
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2f16 = load <2 x half>, ptr undef, align 4
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16 = load <4 x half>, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv16f16 = load <16 x half>, ptr undef, align 32
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8f32 = load <8 x float>, ptr undef, align 32
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r1 = fpext half %loadf16 to float
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = fpext float %loadf32 to double
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, ptr undef, align 8
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-RECIP-LABEL: 'load_fpextends'
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, ptr undef, align 2
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, ptr undef, align 4
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %loadv2f16 = load <2 x half>, ptr undef, align 4
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv4f16 = load <4 x half>, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8f16 = load <8 x half>, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv16f16 = load <16 x half>, ptr undef, align 32
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %loadv2f32 = load <2 x float>, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4f32 = load <4 x float>, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv8f32 = load <8 x float>, ptr undef, align 32
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = fpext half %loadf16 to float
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r3 = fpext float %loadf32 to double
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4f16ou = load <4 x half>, ptr undef, align 8
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-RECIP-LABEL: 'load_fpextends'
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, ptr undef, align 2
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, ptr undef, align 4
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2f16 = load <2 x half>, ptr undef, align 4
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16 = load <4 x half>, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv16f16 = load <16 x half>, ptr undef, align 32
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2f32 = load <2 x float>, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f32 = load <4 x float>, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f32 = load <8 x float>, ptr undef, align 32
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r1 = fpext half %loadf16 to float
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r3 = fpext float %loadf32 to double
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, ptr undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-RECIP-LABEL: 'load_fpextends'
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, ptr undef, align 2
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, ptr undef, align 4
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2f16 = load <2 x half>, ptr undef, align 4
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16 = load <4 x half>, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv16f16 = load <16 x half>, ptr undef, align 32
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2f32 = load <2 x float>, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f32 = load <4 x float>, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f32 = load <8 x float>, ptr undef, align 32
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r1 = fpext half %loadf16 to float
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r3 = fpext float %loadf32 to double
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, ptr undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-RECIP-LABEL: 'load_fpextends'
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, ptr undef, align 2
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, ptr undef, align 4
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv2f16 = load <2 x half>, ptr undef, align 4
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16 = load <4 x half>, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %loadv8f16 = load <8 x half>, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv16f16 = load <16 x half>, ptr undef, align 32
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8f32 = load <8 x float>, ptr undef, align 32
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r1 = fpext half %loadf16 to float
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = fpext float %loadf32 to double
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, ptr undef, align 8
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-SIZE-LABEL: 'load_fpextends'
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, ptr undef, align 2
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = load <2 x half>, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = load <4 x half>, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = load <16 x half>, ptr undef, align 32
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = load <8 x float>, ptr undef, align 32
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = fpext half %loadf16 to float
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r3 = fpext float %loadf32 to double
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, ptr undef, align 8
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-SIZE-LABEL: 'load_fpextends'
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, ptr undef, align 2
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, ptr undef, align 4
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = load <2 x half>, ptr undef, align 4
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = load <4 x half>, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = load <16 x half>, ptr undef, align 32
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = load <8 x float>, ptr undef, align 32
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r1 = fpext half %loadf16 to float
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r3 = fpext float %loadf32 to double
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, ptr undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-SIZE-LABEL: 'load_fpextends'
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, ptr undef, align 2
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, ptr undef, align 4
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = load <2 x half>, ptr undef, align 4
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = load <4 x half>, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = load <16 x half>, ptr undef, align 32
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = load <8 x float>, ptr undef, align 32
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r1 = fpext half %loadf16 to float
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r3 = fpext float %loadf32 to double
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, ptr undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-SIZE-LABEL: 'load_fpextends'
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, ptr undef, align 2
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, ptr undef, align 4
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = load <2 x half>, ptr undef, align 4
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16 = load <4 x half>, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = load <8 x half>, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16f16 = load <16 x half>, ptr undef, align 32
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f32 = load <2 x float>, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = load <4 x float>, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f32 = load <8 x float>, ptr undef, align 32
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r1 = fpext half %loadf16 to float
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r2 = fpext half %loadf16 to double
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = fpext float %loadf32 to double
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, ptr undef, align 8
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
- %loadf16 = load half, half* undef
- %loadf32 = load float, float* undef
- %loadv2f16 = load <2 x half>, <2 x half>* undef
- %loadv4f16 = load <4 x half>, <4 x half>* undef
- %loadv8f16 = load <8 x half>, <8 x half>* undef
- %loadv16f16 = load <16 x half>, <16 x half>* undef
- %loadv2f32 = load <2 x float>, <2 x float>* undef
- %loadv4f32 = load <4 x float>, <4 x float>* undef
- %loadv8f32 = load <8 x float>, <8 x float>* undef
+ %loadf16 = load half, ptr undef
+ %loadf32 = load float, ptr undef
+ %loadv2f16 = load <2 x half>, ptr undef
+ %loadv4f16 = load <4 x half>, ptr undef
+ %loadv8f16 = load <8 x half>, ptr undef
+ %loadv16f16 = load <16 x half>, ptr undef
+ %loadv2f32 = load <2 x float>, ptr undef
+ %loadv4f32 = load <4 x float>, ptr undef
+ %loadv8f32 = load <8 x float>, ptr undef
%r1 = fpext half %loadf16 to float
%r2 = fpext half %loadf16 to double
%v10 = fpext <4 x float> %loadv4f32 to <4 x double>
%v11 = fpext <8 x float> %loadv8f32 to <8 x double>
- %loadv4f16ou = load <4 x half>, <4 x half>* undef
+ %loadv4f16ou = load <4 x half>, ptr undef
%v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
ret i32 undef
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, ptr undef, align 2
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, ptr undef, align 2
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, ptr undef, align 4
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21632, ptr undef, align 4
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21664, ptr undef, align 4
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41632, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41664, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81632, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, ptr undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, ptr undef, align 8
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-RECIP-LABEL: 'load_fptrunc'
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, ptr undef, align 2
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, ptr undef, align 2
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, ptr undef, align 4
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <2 x half> %v21632, ptr undef, align 4
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <2 x half> %v21664, ptr undef, align 4
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x half> %v41632, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <4 x half> %v41664, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x half> %v81632, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x half> %v81664, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <2 x float> %v23264, ptr undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> %v43264, ptr undef, align 8
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-RECIP-LABEL: 'load_fptrunc'
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, ptr undef, align 2
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, ptr undef, align 2
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, ptr undef, align 4
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21632, ptr undef, align 4
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21664, ptr undef, align 4
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41632, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41664, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81632, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> %v23264, ptr undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> %v43264, ptr undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-RECIP-LABEL: 'load_fptrunc'
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, ptr undef, align 2
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, ptr undef, align 2
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, ptr undef, align 4
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21632, ptr undef, align 4
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21664, ptr undef, align 4
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41632, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41664, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81632, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> %v23264, ptr undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> %v43264, ptr undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-RECIP-LABEL: 'load_fptrunc'
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, ptr undef, align 2
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, ptr undef, align 2
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, ptr undef, align 4
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21632, ptr undef, align 4
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x half> %v21664, ptr undef, align 4
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41632, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x half> %v41664, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81632, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, ptr undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, ptr undef, align 8
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-SIZE-LABEL: 'load_fptrunc'
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, ptr undef, align 2
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, ptr undef, align 2
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21632, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21664, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41632, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41664, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81632, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, ptr undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, ptr undef, align 8
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-SIZE-LABEL: 'load_fptrunc'
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, ptr undef, align 2
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, ptr undef, align 2
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, ptr undef, align 4
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21632, ptr undef, align 4
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21664, ptr undef, align 4
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41632, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41664, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81632, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, ptr undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, ptr undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-SIZE-LABEL: 'load_fptrunc'
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, ptr undef, align 2
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, ptr undef, align 2
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, ptr undef, align 4
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21632, ptr undef, align 4
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21664, ptr undef, align 4
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41632, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41664, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81632, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, ptr undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, ptr undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-SIZE-LABEL: 'load_fptrunc'
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, ptr undef, align 2
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, ptr undef, align 2
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, ptr undef, align 4
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21632, ptr undef, align 4
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x half> %v21664, ptr undef, align 4
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41632, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x half> %v41664, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81632, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, ptr undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, ptr undef, align 8
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%i1632 = fptrunc float undef to half
%v23264 = fptrunc <2 x double> undef to <2 x float>
%v43264 = fptrunc <4 x double> undef to <4 x float>
- store half %i1632, half* undef
- store half %i1664, half* undef
- store float %i3264, float* undef
- store <2 x half> %v21632, <2 x half>* undef
- store <2 x half> %v21664, <2 x half>* undef
- store <4 x half> %v41632, <4 x half>* undef
- store <4 x half> %v41664, <4 x half>* undef
- store <8 x half> %v81632, <8 x half>* undef
- store <8 x half> %v81664, <8 x half>* undef
- store <2 x float> %v23264, <2 x float>* undef
- store <4 x float> %v43264, <4 x float>* undef
+ store half %i1632, ptr undef
+ store half %i1664, ptr undef
+ store float %i3264, ptr undef
+ store <2 x half> %v21632, ptr undef
+ store <2 x half> %v21664, ptr undef
+ store <4 x half> %v41632, ptr undef
+ store <4 x half> %v41664, ptr undef
+ store <8 x half> %v81632, ptr undef
+ store <8 x half> %v81664, ptr undef
+ store <2 x float> %v23264, ptr undef
+ store <4 x float> %v43264, ptr undef
ret i32 undef
}
define i32 @maskedload_extends() {
; CHECK-NEON-RECIP-LABEL: 'maskedload_extends'
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i8> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i8> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 2, <2 x i1> undef, <2 x i16> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 2, <4 x i1> undef, <4 x i16> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 2, <8 x i1> undef, <8 x i16> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 4, <2 x i1> undef, <2 x i32> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 4, <4 x i1> undef, <4 x i32> undef)
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-RECIP-LABEL: 'maskedload_extends'
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i8> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i8> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 2, <2 x i1> undef, <2 x i16> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 2, <4 x i1> undef, <4 x i16> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 2, <8 x i1> undef, <8 x i16> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 4, <2 x i1> undef, <2 x i32> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 4, <4 x i1> undef, <4 x i32> undef)
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedload_extends'
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i8> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i8> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 2, <2 x i1> undef, <2 x i16> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 2, <4 x i1> undef, <4 x i16> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 2, <8 x i1> undef, <8 x i16> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 4, <2 x i1> undef, <2 x i32> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 4, <4 x i1> undef, <4 x i32> undef)
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-RECIP-LABEL: 'maskedload_extends'
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i8> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i8> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 2, <2 x i1> undef, <2 x i16> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 2, <4 x i1> undef, <4 x i16> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 2, <8 x i1> undef, <8 x i16> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 4, <2 x i1> undef, <2 x i32> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 4, <4 x i1> undef, <4 x i32> undef)
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-RECIP-LABEL: 'maskedload_extends'
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i8> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i8> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 2, <2 x i1> undef, <2 x i16> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 2, <4 x i1> undef, <4 x i16> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 2, <8 x i1> undef, <8 x i16> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 4, <2 x i1> undef, <2 x i32> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 4, <4 x i1> undef, <4 x i32> undef)
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-SIZE-LABEL: 'maskedload_extends'
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i8> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i8> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 2, <2 x i1> undef, <2 x i16> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 2, <4 x i1> undef, <4 x i16> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 2, <8 x i1> undef, <8 x i16> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 4, <2 x i1> undef, <2 x i32> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 4, <4 x i1> undef, <4 x i32> undef)
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-SIZE-LABEL: 'maskedload_extends'
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i8> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i8> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 2, <2 x i1> undef, <2 x i16> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 2, <4 x i1> undef, <4 x i16> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 2, <8 x i1> undef, <8 x i16> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 4, <2 x i1> undef, <2 x i32> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 4, <4 x i1> undef, <4 x i32> undef)
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-SIZE-LABEL: 'maskedload_extends'
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i8> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i8> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 2, <2 x i1> undef, <2 x i16> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 2, <4 x i1> undef, <4 x i16> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 2, <8 x i1> undef, <8 x i16> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 4, <2 x i1> undef, <2 x i32> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 4, <4 x i1> undef, <4 x i32> undef)
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-SIZE-LABEL: 'maskedload_extends'
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i8> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i8> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 2, <2 x i1> undef, <2 x i16> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 2, <4 x i1> undef, <4 x i16> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 2, <8 x i1> undef, <8 x i16> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 4, <2 x i1> undef, <2 x i32> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 4, <4 x i1> undef, <4 x i32> undef)
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
- %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef)
- %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 1, <4 x i1> undef, <4 x i8> undef)
- %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
- %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
- %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 2, <2 x i1> undef, <2 x i16> undef)
- %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 2, <4 x i1> undef, <4 x i16> undef)
- %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 2, <8 x i1> undef, <8 x i16> undef)
- %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 4, <2 x i1> undef, <2 x i32> undef)
- %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 4, <4 x i1> undef, <4 x i32> undef)
+ %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i8> undef)
+ %loadv4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i8> undef)
+ %loadv8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+ %loadv16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+ %loadv2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 2, <2 x i1> undef, <2 x i16> undef)
+ %loadv4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 2, <4 x i1> undef, <4 x i16> undef)
+ %loadv8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 2, <8 x i1> undef, <8 x i16> undef)
+ %loadv2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 4, <2 x i1> undef, <2 x i32> undef)
+ %loadv4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 4, <4 x i1> undef, <4 x i32> undef)
%v2816s = sext <2 x i8> %loadv2i8 to <2 x i16>
%v2816u = zext <2 x i8> %loadv2i8 to <2 x i16>
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2816, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2832, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2864, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4816, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4832, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4864, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8816, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8832, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8864, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16816, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16832, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16864, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %v21632, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %v21664, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> %v41632, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> %v41664, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> %v81632, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> %v81664, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %v23264, ptr undef, i32 4, <2 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %v43264, ptr undef, i32 4, <4 x i1> undef)
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-RECIP-LABEL: 'maskedstore_trunc'
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2816, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2832, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2864, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4816, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4832, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4864, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8816, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8832, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8864, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16816, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16832, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16864, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %v21632, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %v21664, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> %v41632, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> %v41664, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> %v81632, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> %v81664, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %v23264, ptr undef, i32 4, <2 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %v43264, ptr undef, i32 4, <4 x i1> undef)
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedstore_trunc'
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2816, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2832, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2864, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4816, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4832, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4864, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8816, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8832, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8864, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16816, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16832, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16864, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %v21632, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %v21664, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> %v41632, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> %v41664, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> %v81632, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> %v81664, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %v23264, ptr undef, i32 4, <2 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %v43264, ptr undef, i32 4, <4 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-RECIP-LABEL: 'maskedstore_trunc'
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2816, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2832, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2864, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4816, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4832, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4864, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8816, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8832, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8864, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16816, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16832, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16864, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %v21632, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %v21664, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> %v41632, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> %v41664, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> %v81632, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> %v81664, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %v23264, ptr undef, i32 4, <2 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %v43264, ptr undef, i32 4, <4 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-RECIP-LABEL: 'maskedstore_trunc'
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2816, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2832, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2864, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4816, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4832, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4864, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8816, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8832, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8864, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16816, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16832, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16864, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %v21632, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %v21664, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> %v41632, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> %v41664, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> %v81632, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> %v81664, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %v23264, ptr undef, i32 4, <2 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %v43264, ptr undef, i32 4, <4 x i1> undef)
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-SIZE-LABEL: 'maskedstore_trunc'
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2816, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2832, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2864, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4816, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4832, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4864, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8816, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8832, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8864, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16816, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16832, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16864, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %v21632, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %v21664, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> %v41632, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> %v41664, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> %v81632, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> %v81664, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %v23264, ptr undef, i32 4, <2 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %v43264, ptr undef, i32 4, <4 x i1> undef)
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-SIZE-LABEL: 'maskedstore_trunc'
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2816, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2832, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2864, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4816, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4832, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4864, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8816, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8832, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8864, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16816, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16832, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16864, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %v21632, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %v21664, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> %v41632, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> %v41664, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> %v81632, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> %v81664, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %v23264, ptr undef, i32 4, <2 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %v43264, ptr undef, i32 4, <4 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-SIZE-LABEL: 'maskedstore_trunc'
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2816, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2832, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2864, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4816, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4832, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4864, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8816, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8832, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8864, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16816, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16832, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16864, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %v21632, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %v21664, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> %v41632, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> %v41664, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> %v81632, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> %v81664, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %v23264, ptr undef, i32 4, <2 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %v43264, ptr undef, i32 4, <4 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-SIZE-LABEL: 'maskedstore_trunc'
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v81664 = trunc <8 x i64> undef to <8 x i16>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = trunc <2 x i64> undef to <2 x i32>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v43264 = trunc <4 x i64> undef to <4 x i32>
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2816, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2832, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2864, ptr undef, i32 1, <2 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4816, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4832, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4864, ptr undef, i32 1, <4 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8816, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8832, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8864, ptr undef, i32 1, <8 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16816, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16832, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16864, ptr undef, i32 1, <16 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %v21632, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i16.p0(<2 x i16> %v21664, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> %v41632, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> %v41664, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> %v81632, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> %v81664, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %v23264, ptr undef, i32 4, <2 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %v43264, ptr undef, i32 4, <4 x i1> undef)
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%v2816 = trunc <2 x i16> undef to <2 x i8>
%v23264 = trunc <2 x i64> undef to <2 x i32>
%v43264 = trunc <4 x i64> undef to <4 x i32>
- call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2816, <2 x i8>* undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2832, <2 x i8>* undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.store.v2i8.p0v2i8(<2 x i8> %v2864, <2 x i8>* undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4816, <4 x i8>* undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4832, <4 x i8>* undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %v4864, <4 x i8>* undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8816, <8 x i8>* undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8832, <8 x i8>* undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %v8864, <8 x i8>* undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16816, <16 x i8>* undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16832, <16 x i8>* undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %v16864, <16 x i8>* undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21632, <2 x i16>* undef, i32 2, <2 x i1> undef)
- call void @llvm.masked.store.v2i16.p0v2i16(<2 x i16> %v21664, <2 x i16>* undef, i32 2, <2 x i1> undef)
- call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41632, <4 x i16>* undef, i32 2, <4 x i1> undef)
- call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %v41664, <4 x i16>* undef, i32 2, <4 x i1> undef)
- call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81632, <8 x i16>* undef, i32 2, <8 x i1> undef)
- call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef)
- call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef)
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef)
+ call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2816, ptr undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2832, ptr undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.store.v2i8.p0(<2 x i8> %v2864, ptr undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4816, ptr undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4832, ptr undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.store.v4i8.p0(<4 x i8> %v4864, ptr undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8816, ptr undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8832, ptr undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.store.v8i8.p0(<8 x i8> %v8864, ptr undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16816, ptr undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16832, ptr undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.store.v16i8.p0(<16 x i8> %v16864, ptr undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.store.v2i16.p0(<2 x i16> %v21632, ptr undef, i32 2, <2 x i1> undef)
+ call void @llvm.masked.store.v2i16.p0(<2 x i16> %v21664, ptr undef, i32 2, <2 x i1> undef)
+ call void @llvm.masked.store.v4i16.p0(<4 x i16> %v41632, ptr undef, i32 2, <4 x i1> undef)
+ call void @llvm.masked.store.v4i16.p0(<4 x i16> %v41664, ptr undef, i32 2, <4 x i1> undef)
+ call void @llvm.masked.store.v8i16.p0(<8 x i16> %v81632, ptr undef, i32 2, <8 x i1> undef)
+ call void @llvm.masked.store.v8i16.p0(<8 x i16> %v81664, ptr undef, i32 2, <8 x i1> undef)
+ call void @llvm.masked.store.v2i32.p0(<2 x i32> %v23264, ptr undef, i32 4, <2 x i1> undef)
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %v43264, ptr undef, i32 4, <4 x i1> undef)
ret i32 undef
}
define i32 @maskedload_fpextends() {
; CHECK-NEON-RECIP-LABEL: 'maskedload_fpextends'
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 2, <2 x i1> undef, <2 x half> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 2, <4 x i1> undef, <4 x half> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 2, <8 x i1> undef, <8 x half> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0(ptr undef, i32 2, <16 x i1> undef, <16 x half> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 4, <2 x i1> undef, <2 x float> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 4, <4 x i1> undef, <4 x float> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 4, <8 x i1> undef, <8 x float> undef)
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, ptr undef, align 8
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-RECIP-LABEL: 'maskedload_fpextends'
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 2, <2 x i1> undef, <2 x half> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 2, <4 x i1> undef, <4 x half> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 2, <8 x i1> undef, <8 x half> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0(ptr undef, i32 2, <16 x i1> undef, <16 x half> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 4, <2 x i1> undef, <2 x float> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 4, <4 x i1> undef, <4 x float> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 4, <8 x i1> undef, <8 x float> undef)
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4f16ou = load <4 x half>, ptr undef, align 8
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedload_fpextends'
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 2, <2 x i1> undef, <2 x half> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 2, <4 x i1> undef, <4 x half> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 2, <8 x i1> undef, <8 x half> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0(ptr undef, i32 2, <16 x i1> undef, <16 x half> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 4, <2 x i1> undef, <2 x float> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 4, <4 x i1> undef, <4 x float> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 4, <8 x i1> undef, <8 x float> undef)
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, ptr undef, align 8
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-RECIP-LABEL: 'maskedload_fpextends'
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 2, <2 x i1> undef, <2 x half> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 2, <4 x i1> undef, <4 x half> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 2, <8 x i1> undef, <8 x half> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0(ptr undef, i32 2, <16 x i1> undef, <16 x half> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 4, <2 x i1> undef, <2 x float> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 4, <4 x i1> undef, <4 x float> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 4, <8 x i1> undef, <8 x float> undef)
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, ptr undef, align 8
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-RECIP-LABEL: 'maskedload_fpextends'
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 2, <2 x i1> undef, <2 x half> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 2, <4 x i1> undef, <4 x half> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 2, <8 x i1> undef, <8 x half> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0(ptr undef, i32 2, <16 x i1> undef, <16 x half> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 4, <2 x i1> undef, <2 x float> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 4, <4 x i1> undef, <4 x float> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 4, <8 x i1> undef, <8 x float> undef)
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, ptr undef, align 8
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-SIZE-LABEL: 'maskedload_fpextends'
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 2, <2 x i1> undef, <2 x half> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 2, <4 x i1> undef, <4 x half> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 2, <8 x i1> undef, <8 x half> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0(ptr undef, i32 2, <16 x i1> undef, <16 x half> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 4, <2 x i1> undef, <2 x float> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 4, <4 x i1> undef, <4 x float> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 4, <8 x i1> undef, <8 x float> undef)
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, ptr undef, align 8
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-SIZE-LABEL: 'maskedload_fpextends'
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 2, <2 x i1> undef, <2 x half> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 2, <4 x i1> undef, <4 x half> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 2, <8 x i1> undef, <8 x half> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0(ptr undef, i32 2, <16 x i1> undef, <16 x half> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 4, <2 x i1> undef, <2 x float> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 4, <4 x i1> undef, <4 x float> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 4, <8 x i1> undef, <8 x float> undef)
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, ptr undef, align 8
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-SIZE-LABEL: 'maskedload_fpextends'
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 2, <2 x i1> undef, <2 x half> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 2, <4 x i1> undef, <4 x half> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 2, <8 x i1> undef, <8 x half> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0(ptr undef, i32 2, <16 x i1> undef, <16 x half> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 4, <2 x i1> undef, <2 x float> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 4, <4 x i1> undef, <4 x float> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 4, <8 x i1> undef, <8 x float> undef)
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, ptr undef, align 8
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-SIZE-LABEL: 'maskedload_fpextends'
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 2, <2 x i1> undef, <2 x half> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 2, <4 x i1> undef, <4 x half> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 2, <8 x i1> undef, <8 x half> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0(ptr undef, i32 2, <16 x i1> undef, <16 x half> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 4, <2 x i1> undef, <2 x float> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 4, <4 x i1> undef, <4 x float> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 4, <8 x i1> undef, <8 x float> undef)
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v1 = fpext <2 x half> %loadv2f16 to <2 x float>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2 = fpext <4 x half> %loadv4f16 to <4 x float>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v3 = fpext <8 x half> %loadv8f16 to <8 x float>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double>
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv4f16ou = load <4 x half>, ptr undef, align 8
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
- %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef)
- %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 2, <4 x i1> undef, <4 x half> undef)
- %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 2, <8 x i1> undef, <8 x half> undef)
- %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>* undef, i32 2, <16 x i1> undef, <16 x half> undef)
- %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 4, <2 x i1> undef, <2 x float> undef)
- %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 4, <4 x i1> undef, <4 x float> undef)
- %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> undef, <8 x float> undef)
+ %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 2, <2 x i1> undef, <2 x half> undef)
+ %loadv4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 2, <4 x i1> undef, <4 x half> undef)
+ %loadv8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 2, <8 x i1> undef, <8 x half> undef)
+ %loadv16f16 = call <16 x half> @llvm.masked.load.v16f16.p0(ptr undef, i32 2, <16 x i1> undef, <16 x half> undef)
+ %loadv2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 4, <2 x i1> undef, <2 x float> undef)
+ %loadv4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 4, <4 x i1> undef, <4 x float> undef)
+ %loadv8f32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 4, <8 x i1> undef, <8 x float> undef)
%v1 = fpext <2 x half> %loadv2f16 to <2 x float>
%v2 = fpext <4 x half> %loadv4f16 to <4 x float>
%v10 = fpext <4 x float> %loadv4f32 to <4 x double>
%v11 = fpext <8 x float> %loadv8f32 to <8 x double>
- %loadv4f16ou = load <4 x half>, <4 x half>* undef
+ %loadv4f16ou = load <4 x half>, ptr undef
%v2ou = fpext <4 x half> %loadv4f16ou to <4 x float>
ret i32 undef
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
-; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0(<2 x half> %v21632, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0(<2 x half> %v21664, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0(<4 x half> %v41632, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0(<4 x half> %v41664, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0(<8 x half> %v81632, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0(<8 x half> %v81664, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %v23264, ptr undef, i32 4, <2 x i1> undef)
+; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> %v43264, ptr undef, i32 4, <4 x i1> undef)
; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-RECIP-LABEL: 'maskedload_fptrunc'
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0(<2 x half> %v21632, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0(<2 x half> %v21664, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0(<4 x half> %v41632, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0(<4 x half> %v41664, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8f16.p0(<8 x half> %v81632, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8f16.p0(<8 x half> %v81664, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %v23264, ptr undef, i32 4, <2 x i1> undef)
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> %v43264, ptr undef, i32 4, <4 x i1> undef)
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedload_fptrunc'
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0(<2 x half> %v21632, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0(<2 x half> %v21664, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0(<4 x half> %v41632, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0(<4 x half> %v41664, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0(<8 x half> %v81632, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0(<8 x half> %v81664, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %v23264, ptr undef, i32 4, <2 x i1> undef)
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> %v43264, ptr undef, i32 4, <4 x i1> undef)
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-RECIP-LABEL: 'maskedload_fptrunc'
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0(<2 x half> %v21632, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0(<2 x half> %v21664, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0(<4 x half> %v41632, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0(<4 x half> %v41664, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0(<8 x half> %v81632, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0(<8 x half> %v81664, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %v23264, ptr undef, i32 4, <2 x i1> undef)
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> %v43264, ptr undef, i32 4, <4 x i1> undef)
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-RECIP-LABEL: 'maskedload_fptrunc'
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0(<2 x half> %v21632, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0(<2 x half> %v21664, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0(<4 x half> %v41632, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0(<4 x half> %v41664, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0(<8 x half> %v81632, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0(<8 x half> %v81664, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %v23264, ptr undef, i32 4, <2 x i1> undef)
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> %v43264, ptr undef, i32 4, <4 x i1> undef)
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-SIZE-LABEL: 'maskedload_fptrunc'
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0(<2 x half> %v21632, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0(<2 x half> %v21664, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0(<4 x half> %v41632, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0(<4 x half> %v41664, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0(<8 x half> %v81632, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0(<8 x half> %v81664, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %v23264, ptr undef, i32 4, <2 x i1> undef)
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> %v43264, ptr undef, i32 4, <4 x i1> undef)
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-SIZE-LABEL: 'maskedload_fptrunc'
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0(<2 x half> %v21632, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0(<2 x half> %v21664, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0(<4 x half> %v41632, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0(<4 x half> %v41664, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0(<8 x half> %v81632, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0(<8 x half> %v81664, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %v23264, ptr undef, i32 4, <2 x i1> undef)
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> %v43264, ptr undef, i32 4, <4 x i1> undef)
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-SIZE-LABEL: 'maskedload_fptrunc'
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0(<2 x half> %v21632, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0(<2 x half> %v21664, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0(<4 x half> %v41632, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0(<4 x half> %v41664, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0(<8 x half> %v81632, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0(<8 x half> %v81664, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %v23264, ptr undef, i32 4, <2 x i1> undef)
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> %v43264, ptr undef, i32 4, <4 x i1> undef)
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-SIZE-LABEL: 'maskedload_fptrunc'
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v81664 = fptrunc <8 x double> undef to <8 x half>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264 = fptrunc <2 x double> undef to <2 x float>
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0(<2 x half> %v21632, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f16.p0(<2 x half> %v21664, ptr undef, i32 2, <2 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0(<4 x half> %v41632, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f16.p0(<4 x half> %v41664, ptr undef, i32 2, <4 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0(<8 x half> %v81632, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v8f16.p0(<8 x half> %v81664, ptr undef, i32 2, <8 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %v23264, ptr undef, i32 4, <2 x i1> undef)
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> %v43264, ptr undef, i32 4, <4 x i1> undef)
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%v21632 = fptrunc <2 x float> undef to <2 x half>
%v23264 = fptrunc <2 x double> undef to <2 x float>
%v43264 = fptrunc <4 x double> undef to <4 x float>
- call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21632, <2 x half>* undef, i32 2, <2 x i1> undef)
- call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %v21664, <2 x half>* undef, i32 2, <2 x i1> undef)
- call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41632, <4 x half>* undef, i32 2, <4 x i1> undef)
- call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %v41664, <4 x half>* undef, i32 2, <4 x i1> undef)
- call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81632, <8 x half>* undef, i32 2, <8 x i1> undef)
- call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef)
- call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef)
- call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef)
+ call void @llvm.masked.store.v2f16.p0(<2 x half> %v21632, ptr undef, i32 2, <2 x i1> undef)
+ call void @llvm.masked.store.v2f16.p0(<2 x half> %v21664, ptr undef, i32 2, <2 x i1> undef)
+ call void @llvm.masked.store.v4f16.p0(<4 x half> %v41632, ptr undef, i32 2, <4 x i1> undef)
+ call void @llvm.masked.store.v4f16.p0(<4 x half> %v41664, ptr undef, i32 2, <4 x i1> undef)
+ call void @llvm.masked.store.v8f16.p0(<8 x half> %v81632, ptr undef, i32 2, <8 x i1> undef)
+ call void @llvm.masked.store.v8f16.p0(<8 x half> %v81664, ptr undef, i32 2, <8 x i1> undef)
+ call void @llvm.masked.store.v2f32.p0(<2 x float> %v23264, ptr undef, i32 4, <2 x i1> undef)
+ call void @llvm.masked.store.v4f32.p0(<4 x float> %v43264, ptr undef, i32 4, <4 x i1> undef)
ret i32 undef
}
-declare <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>*, i32, <2 x i1>, <2 x i8>)
-declare <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>*, i32, <2 x i1>, <2 x i16>)
-declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
-declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32, <4 x i1>, <4 x i8>)
-declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32, <4 x i1>, <4 x i16>)
-declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
-declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>)
-declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>)
-declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>)
-declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>)
-declare <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i16>)
-declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
-declare void @llvm.masked.store.v2i8.p0v2i8(<2 x i8>, <2 x i8>*, i32 immarg, <2 x i1>)
-declare void @llvm.masked.store.v2i16.p0v2i16(<2 x i16>, <2 x i16>*, i32 immarg, <2 x i1>)
-declare void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>, <2 x i32>*, i32 immarg, <2 x i1>)
-declare void @llvm.masked.store.v4i8.p0v4i8(<4 x i8>, <4 x i8>*, i32 immarg, <4 x i1>)
-declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32 immarg, <4 x i1>)
-declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
-declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32 immarg, <8 x i1>)
-declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>)
-declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32 immarg, <8 x i1>)
-declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>)
-declare void @llvm.masked.store.v16i16.p0v16i16(<16 x i16>, <16 x i16>*, i32 immarg, <16 x i1>)
-declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32 immarg, <16 x i1>)
+declare <2 x i8> @llvm.masked.load.v2i8.p0(ptr, i32, <2 x i1>, <2 x i8>)
+declare <2 x i16> @llvm.masked.load.v2i16.p0(ptr, i32, <2 x i1>, <2 x i16>)
+declare <2 x i32> @llvm.masked.load.v2i32.p0(ptr, i32, <2 x i1>, <2 x i32>)
+declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32, <4 x i1>, <4 x i8>)
+declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32, <4 x i1>, <4 x i16>)
+declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>)
+declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32, <8 x i1>, <8 x i8>)
+declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32, <8 x i1>, <8 x i16>)
+declare <8 x i32> @llvm.masked.load.v8i32.p0(ptr, i32, <8 x i1>, <8 x i32>)
+declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32, <16 x i1>, <16 x i8>)
+declare <16 x i16> @llvm.masked.load.v16i16.p0(ptr, i32, <16 x i1>, <16 x i16>)
+declare <16 x i32> @llvm.masked.load.v16i32.p0(ptr, i32, <16 x i1>, <16 x i32>)
+declare void @llvm.masked.store.v2i8.p0(<2 x i8>, ptr, i32 immarg, <2 x i1>)
+declare void @llvm.masked.store.v2i16.p0(<2 x i16>, ptr, i32 immarg, <2 x i1>)
+declare void @llvm.masked.store.v2i32.p0(<2 x i32>, ptr, i32 immarg, <2 x i1>)
+declare void @llvm.masked.store.v4i8.p0(<4 x i8>, ptr, i32 immarg, <4 x i1>)
+declare void @llvm.masked.store.v4i16.p0(<4 x i16>, ptr, i32 immarg, <4 x i1>)
+declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
+declare void @llvm.masked.store.v8i8.p0(<8 x i8>, ptr, i32 immarg, <8 x i1>)
+declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>)
+declare void @llvm.masked.store.v8i32.p0(<8 x i32>, ptr, i32 immarg, <8 x i1>)
+declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32 immarg, <16 x i1>)
+declare void @llvm.masked.store.v16i16.p0(<16 x i16>, ptr, i32 immarg, <16 x i1>)
+declare void @llvm.masked.store.v16i32.p0(<16 x i32>, ptr, i32 immarg, <16 x i1>)
-declare <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>*, i32, <2 x i1>, <2 x half>)
-declare <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
-declare <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>*, i32, <4 x i1>, <4 x half>)
-declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
-declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32, <8 x i1>, <8 x half>)
-declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
-declare <16 x half> @llvm.masked.load.v16f16.p0v16f16(<16 x half>*, i32, <16 x i1>, <16 x half>)
-declare <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
-declare void @llvm.masked.store.v2f16.p0v2f16(<2 x half>, <2 x half>*, i32 immarg, <2 x i1>)
-declare void @llvm.masked.store.v2f32.p0v2f32(<2 x float>, <2 x float>*, i32 immarg, <2 x i1>)
-declare void @llvm.masked.store.v4f16.p0v4f16(<4 x half>, <4 x half>*, i32 immarg, <4 x i1>)
-declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>)
-declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, i32 immarg, <8 x i1>)
-declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32 immarg, <8 x i1>)
-declare void @llvm.masked.store.v16f16.p0v16f16(<16 x half>, <16 x half>*, i32 immarg, <16 x i1>)
-declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32 immarg, <16 x i1>)
+declare <2 x half> @llvm.masked.load.v2f16.p0(ptr, i32, <2 x i1>, <2 x half>)
+declare <2 x float> @llvm.masked.load.v2f32.p0(ptr, i32, <2 x i1>, <2 x float>)
+declare <4 x half> @llvm.masked.load.v4f16.p0(ptr, i32, <4 x i1>, <4 x half>)
+declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32, <4 x i1>, <4 x float>)
+declare <8 x half> @llvm.masked.load.v8f16.p0(ptr, i32, <8 x i1>, <8 x half>)
+declare <8 x float> @llvm.masked.load.v8f32.p0(ptr, i32, <8 x i1>, <8 x float>)
+declare <16 x half> @llvm.masked.load.v16f16.p0(ptr, i32, <16 x i1>, <16 x half>)
+declare <16 x float> @llvm.masked.load.v16f32.p0(ptr, i32, <16 x i1>, <16 x float>)
+declare void @llvm.masked.store.v2f16.p0(<2 x half>, ptr, i32 immarg, <2 x i1>)
+declare void @llvm.masked.store.v2f32.p0(<2 x float>, ptr, i32 immarg, <2 x i1>)
+declare void @llvm.masked.store.v4f16.p0(<4 x half>, ptr, i32 immarg, <4 x i1>)
+declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32 immarg, <4 x i1>)
+declare void @llvm.masked.store.v8f16.p0(<8 x half>, ptr, i32 immarg, <8 x i1>)
+declare void @llvm.masked.store.v8f32.p0(<8 x float>, ptr, i32 immarg, <8 x i1>)
+declare void @llvm.masked.store.v16f16.p0(<16 x half>, ptr, i32 immarg, <16 x i1>)
+declare void @llvm.masked.store.v16f32.p0(<16 x float>, ptr, i32 immarg, <16 x i1>)
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a10 = fcmp olt <8 x half> undef, undef
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a11 = fcmp oge <4 x float> undef, undef
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a12 = fcmp oge <2 x double> undef, undef
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq i32* undef, undef
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q = icmp eq <4 x i32*> undef, undef
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %q = icmp eq <4 x ptr> undef, undef
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-RECIP-LABEL: 'cmps'
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a10 = fcmp olt <8 x half> undef, undef
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a11 = fcmp oge <4 x float> undef, undef
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a12 = fcmp oge <2 x double> undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq i32* undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q = icmp eq <4 x i32*> undef, undef
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q = icmp eq <4 x ptr> undef, undef
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-RECIP-LABEL: 'cmps'
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a10 = fcmp olt <8 x half> undef, undef
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a11 = fcmp oge <4 x float> undef, undef
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a12 = fcmp oge <2 x double> undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq i32* undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q = icmp eq <4 x i32*> undef, undef
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q = icmp eq <4 x ptr> undef, undef
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-RECIP-LABEL: 'cmps'
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %a10 = fcmp olt <8 x half> undef, undef
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a12 = fcmp oge <2 x double> undef, undef
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq i32* undef, undef
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x i32*> undef, undef
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x ptr> undef, undef
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; CHECK-MVE-SIZE-LABEL: 'cmps'
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq i32* undef, undef
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x i32*> undef, undef
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x ptr> undef, undef
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-MAIN-SIZE-LABEL: 'cmps'
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq i32* undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x i32*> undef, undef
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x ptr> undef, undef
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8M-BASE-SIZE-LABEL: 'cmps'
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq i32* undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x i32*> undef, undef
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x ptr> undef, undef
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-V8R-SIZE-LABEL: 'cmps'
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq i32* undef, undef
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x i32*> undef, undef
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x ptr> undef, undef
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%a = icmp slt i8 undef, undef
%a10 = fcmp olt <8 x half> undef, undef
%a11 = fcmp oge <4 x float> undef, undef
%a12 = fcmp oge <2 x double> undef, undef
- %p = icmp eq i32* undef, undef
- %q = icmp eq <4 x i32*> undef, undef
+ %p = icmp eq ptr undef, undef
+ %q = icmp eq <4 x ptr> undef, undef
ret i32 undef
}
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt i32* undef, undef
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c6 = icmp slt <4 x i32*> undef, undef
-; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c6 = icmp slt <4 x ptr> undef, undef
+; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-V8M-MAIN-RECIP-LABEL: 'minmax'
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt i32* undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c6 = icmp slt <4 x i32*> undef, undef
-; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c6 = icmp slt <4 x ptr> undef, undef
+; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-V8M-BASE-RECIP-LABEL: 'minmax'
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt i32* undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c6 = icmp slt <4 x i32*> undef, undef
-; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c6 = icmp slt <4 x ptr> undef, undef
+; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-V8R-RECIP-LABEL: 'minmax'
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt i32* undef, undef
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x i32*> undef, undef
-; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x ptr> undef, undef
+; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-MVE-SIZE-LABEL: 'minmax'
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt i32* undef, undef
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x i32*> undef, undef
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x ptr> undef, undef
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-V8M-MAIN-SIZE-LABEL: 'minmax'
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt i32* undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x i32*> undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x ptr> undef, undef
+; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-V8M-BASE-SIZE-LABEL: 'minmax'
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt i32* undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x i32*> undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x ptr> undef, undef
+; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-V8R-SIZE-LABEL: 'minmax'
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt i32* undef, undef
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x i32*> undef, undef
-; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x ptr> undef, undef
+; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%c1 = icmp slt i8 undef, undef
%s3 = select i1 %c3, i32 undef, i32 undef
%c4 = icmp slt <4 x i32> undef, undef
%s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
- %c5 = icmp slt i32* undef, undef
- %s5 = select i1 %c5, i32* undef, i32* undef
- %c6 = icmp slt <4 x i32*> undef, undef
- %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef
+ %c5 = icmp slt ptr undef, undef
+ %s5 = select i1 %c5, ptr undef, ptr undef
+ %c6 = icmp slt <4 x ptr> undef, undef
+ %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
ret void
}
ret i32 %res
}
-define i32 @simple_mul_loop(i32* %A, i32* %B, i32 %N) {
+define i32 @simple_mul_loop(ptr %A, ptr %B, i32 %N) {
; CHECK-T1-SIZE-LABEL: 'simple_mul_loop'
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
-; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
+; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
+; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, ptr %addr.a, align 4
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
-; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
+; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
-; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
+; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
+; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, ptr %addr.a, align 4
; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
-; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
+; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, ptr %addr.a, align 4
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
-; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
+; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
+; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, ptr %addr.a, align 4
; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
-; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
+; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
-; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, i32* %addr.a, align 4
+; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
+; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, ptr %addr.a, align 4
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
-; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
+; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
-; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, i32* %addr.a, align 4
+; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
+; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, ptr %addr.a, align 4
; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
-; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
+; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
-; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, i32* %addr.a, align 4
+; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
+; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, ptr %addr.a, align 4
; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
-; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
+; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
-; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
+; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
+; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, ptr %addr.a, align 4
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
-; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
+; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
-; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
+; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
+; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, ptr %addr.a, align 4
; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
-; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
+; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
-; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
+; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
+; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, ptr %addr.a, align 4
; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
-; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
+; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %loop
; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
-; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
+; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
+; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, ptr %addr.a, align 4
; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
-; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
+; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %loop
; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv
-; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4
+; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, ptr %A, i32 %iv
+; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, ptr %addr.a, align 4
; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load
-; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv
-; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, ptr %B, i32 %iv
+; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
loop:
%iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
- %addr.a = getelementptr i32, i32* %A, i32 %iv
- %load = load i32, i32* %addr.a
+ %addr.a = getelementptr i32, ptr %A, i32 %iv
+ %load = load i32, ptr %addr.a
%mul = mul i32 %load, %load
- %addr.b = getelementptr i32, i32* %B, i32 %iv
- store i32 %mul, i32* %addr.b
+ %addr.b = getelementptr i32, ptr %B, i32 %iv
+ store i32 %mul, ptr %addr.b
%iv.next = add nuw i32 %iv, 1
%cmp = icmp ne i32 %iv.next, %N
br i1 %cmp, label %loop, label %exit
ret i32 %res
}
-define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) {
+define i32 @simple_mul_ext_lsr_loop(ptr %A, ptr %B, i32 %N) {
; CHECK-T1-SIZE-LABEL: 'simple_mul_ext_lsr_loop'
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
+; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
+; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
+; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, ptr %addr.a, align 2
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
-; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
+; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
+; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
+; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
+; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
+; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, ptr %addr.a, align 2
; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
-; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
+; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
+; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, ptr %addr.a, align 2
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
+; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
+; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
+; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, ptr %addr.a, align 2
; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
-; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
+; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
+; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, i16* %addr.a, align 2
+; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
+; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
+; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, ptr %addr.a, align 2
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
-; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
+; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
+; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, i16* %addr.a, align 2
+; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
+; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
+; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, ptr %addr.a, align 2
; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
-; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
+; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
+; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, i16* %addr.a, align 2
+; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
+; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
+; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, ptr %addr.a, align 2
; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
-; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
+; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
+; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
+; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
+; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
+; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, ptr %addr.a, align 2
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
-; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
+; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
+; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
+; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
+; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
+; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, ptr %addr.a, align 2
; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
-; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
+; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
+; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop
; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
+; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
+; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
+; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, ptr %addr.a, align 2
; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
-; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
+; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
+; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %loop
; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
+; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
+; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
+; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, ptr %addr.a, align 2
; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
-; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
+; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
+; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader
; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %loop
; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
-; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
-; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
-; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2
+; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
+; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
+; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, ptr %addr.a, align 2
; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32
; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7
-; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4
+; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, ptr %addr.b, align 4
; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1
-; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
-; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
+; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
+; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N
; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit
; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
loop:
%iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ]
- %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ]
- %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ]
- %load = load i16, i16* %addr.a
+ %addr.a = phi ptr [ %A, %preheader ], [ %addr.a, %loop ]
+ %addr.b = phi ptr [ %B, %preheader ], [ %addr.b, %loop ]
+ %load = load i16, ptr %addr.a
%sext = sext i16 %load to i32
%mul = mul i32 %sext, 7
- store i32 %mul, i32* %addr.b
+ store i32 %mul, ptr %addr.b
%iv.next = add nuw i32 %iv, 1
- %addr.a.next = getelementptr i16, i16* %addr.a, i32 1
- %addr.b.next = getelementptr i32, i32* %addr.b, i32 1
+ %addr.a.next = getelementptr i16, ptr %addr.a, i32 1
+ %addr.b.next = getelementptr i32, ptr %addr.b, i32 1
%cmp = icmp ne i32 %iv.next, %N
br i1 %cmp, label %loop, label %exit
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
-define void @testi8(i8* %a, i32 %i) {
+define void @testi8(ptr %a, i32 %i) {
; CHECK-V6M-LABEL: 'testi8'
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, i8* %a, i32 1
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am4 = getelementptr inbounds i8, i8* %a, i32 -1
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, i8* %a, i32 31
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a32 = getelementptr inbounds i8, i8* %a, i32 32
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i8, i8* %a, i32 4095
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, i8* %a, i32 4096
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i8, i8* %a, i32 -255
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, i8* %a, i32 -256
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am4 = getelementptr inbounds i8, ptr %a, i32 -1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, ptr %a, i32 31
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a32 = getelementptr inbounds i8, ptr %a, i32 32
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i8, ptr %a, i32 -255
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, ptr %a, i32 -256
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i
; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-V7M-NOFP-LABEL: 'testi8'
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, i8* %a, i32 1
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, i8* %a, i32 -1
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, i8* %a, i32 31
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, i8* %a, i32 32
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, i8* %a, i32 4095
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, i8* %a, i32 4096
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, i8* %a, i32 -255
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, i8* %a, i32 -256
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, ptr %a, i32 -1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, ptr %a, i32 31
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, ptr %a, i32 32
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, ptr %a, i32 -255
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, ptr %a, i32 -256
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i
; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-V7M-FP-LABEL: 'testi8'
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, i8* %a, i32 1
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, i8* %a, i32 -1
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, i8* %a, i32 31
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, i8* %a, i32 32
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, i8* %a, i32 4095
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, i8* %a, i32 4096
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, i8* %a, i32 -255
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, i8* %a, i32 -256
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, ptr %a, i32 -1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, ptr %a, i32 31
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, ptr %a, i32 32
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, ptr %a, i32 -255
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, ptr %a, i32 -256
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i
; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-MVE-LABEL: 'testi8'
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, i8* %a, i32 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, i8* %a, i32 -1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, i8* %a, i32 31
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, i8* %a, i32 32
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, i8* %a, i32 4095
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, i8* %a, i32 4096
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, i8* %a, i32 -255
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, i8* %a, i32 -256
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, ptr %a, i32 -1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, ptr %a, i32 31
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, ptr %a, i32 32
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, ptr %a, i32 -255
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, ptr %a, i32 -256
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-MVEFP-LABEL: 'testi8'
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, i8* %a, i32 1
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, i8* %a, i32 -1
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, i8* %a, i32 31
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, i8* %a, i32 32
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, i8* %a, i32 4095
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, i8* %a, i32 4096
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, i8* %a, i32 -255
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, i8* %a, i32 -256
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, ptr %a, i32 -1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, ptr %a, i32 31
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, ptr %a, i32 32
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, ptr %a, i32 -255
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, ptr %a, i32 -256
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-T32-LABEL: 'testi8'
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, i8* %a, i32 1
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, i8* %a, i32 -1
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, i8* %a, i32 31
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, i8* %a, i32 32
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, i8* %a, i32 4095
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, i8* %a, i32 4096
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, i8* %a, i32 -255
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, i8* %a, i32 -256
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, ptr %a, i32 -1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, ptr %a, i32 31
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, ptr %a, i32 32
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, ptr %a, i32 -255
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, ptr %a, i32 -256
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i
; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-A32-LABEL: 'testi8'
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, i8* %a, i32 1
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, i8* %a, i32 -1
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, i8* %a, i32 31
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, i8* %a, i32 32
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, i8* %a, i32 4095
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, i8* %a, i32 4096
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, i8* %a, i32 -255
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i8, i8* %a, i32 -256
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, ptr %a, i32 -1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, ptr %a, i32 31
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, ptr %a, i32 32
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, ptr %a, i32 -255
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i8, ptr %a, i32 -256
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i
; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %a0 = getelementptr inbounds i8, i8* %a, i32 0
- %a1 = getelementptr inbounds i8, i8* %a, i32 1
- %am4 = getelementptr inbounds i8, i8* %a, i32 -1
- %a31 = getelementptr inbounds i8, i8* %a, i32 31
- %a32 = getelementptr inbounds i8, i8* %a, i32 32
- %a4095 = getelementptr inbounds i8, i8* %a, i32 4095
- %a4096 = getelementptr inbounds i8, i8* %a, i32 4096
- %am255 = getelementptr inbounds i8, i8* %a, i32 -255
- %am256 = getelementptr inbounds i8, i8* %a, i32 -256
- %ai = getelementptr inbounds i8, i8* %a, i32 %i
+ %a1 = getelementptr inbounds i8, ptr %a, i32 1
+ %am4 = getelementptr inbounds i8, ptr %a, i32 -1
+ %a31 = getelementptr inbounds i8, ptr %a, i32 31
+ %a32 = getelementptr inbounds i8, ptr %a, i32 32
+ %a4095 = getelementptr inbounds i8, ptr %a, i32 4095
+ %a4096 = getelementptr inbounds i8, ptr %a, i32 4096
+ %am255 = getelementptr inbounds i8, ptr %a, i32 -255
+ %am256 = getelementptr inbounds i8, ptr %a, i32 -256
+ %ai = getelementptr inbounds i8, ptr %a, i32 %i
ret void
}
-define void @testi16(i16* %a, i32 %i) {
+define void @testi16(ptr %a, i32 %i) {
; CHECK-V6M-LABEL: 'testi16'
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* %a, i32 1
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am4 = getelementptr inbounds i16, i16* %a, i32 -1
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, i16* %a, i32 31
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a32 = getelementptr inbounds i16, i16* %a, i32 32
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i16, i16* %a, i32 2046
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, i16* %a, i32 2048
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i16, i16* %a, i32 -127
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, i16* %a, i32 -128
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am4 = getelementptr inbounds i16, ptr %a, i32 -1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, ptr %a, i32 31
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a32 = getelementptr inbounds i16, ptr %a, i32 32
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i16, ptr %a, i32 -127
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, ptr %a, i32 -128
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i
; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-V7M-NOFP-LABEL: 'testi16'
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* %a, i32 1
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, i16* %a, i32 -1
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, i16* %a, i32 31
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, i16* %a, i32 32
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, i16* %a, i32 2046
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, i16* %a, i32 2048
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, i16* %a, i32 -127
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, i16* %a, i32 -128
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, ptr %a, i32 -1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, ptr %a, i32 31
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, ptr %a, i32 32
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, ptr %a, i32 -127
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, ptr %a, i32 -128
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i
; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-V7M-FP-LABEL: 'testi16'
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* %a, i32 1
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, i16* %a, i32 -1
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, i16* %a, i32 31
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, i16* %a, i32 32
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, i16* %a, i32 2046
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, i16* %a, i32 2048
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, i16* %a, i32 -127
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, i16* %a, i32 -128
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, ptr %a, i32 -1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, ptr %a, i32 31
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, ptr %a, i32 32
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, ptr %a, i32 -127
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, ptr %a, i32 -128
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i
; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-MVE-LABEL: 'testi16'
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* %a, i32 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, i16* %a, i32 -1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, i16* %a, i32 31
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, i16* %a, i32 32
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, i16* %a, i32 2046
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, i16* %a, i32 2048
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, i16* %a, i32 -127
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, i16* %a, i32 -128
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, ptr %a, i32 -1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, ptr %a, i32 31
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, ptr %a, i32 32
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, ptr %a, i32 -127
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, ptr %a, i32 -128
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-MVEFP-LABEL: 'testi16'
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* %a, i32 1
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, i16* %a, i32 -1
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, i16* %a, i32 31
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, i16* %a, i32 32
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, i16* %a, i32 2046
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, i16* %a, i32 2048
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, i16* %a, i32 -127
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, i16* %a, i32 -128
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, ptr %a, i32 -1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, ptr %a, i32 31
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, ptr %a, i32 32
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, ptr %a, i32 -127
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, ptr %a, i32 -128
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-T32-LABEL: 'testi16'
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* %a, i32 1
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, i16* %a, i32 -1
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, i16* %a, i32 31
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, i16* %a, i32 32
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, i16* %a, i32 2046
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, i16* %a, i32 2048
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, i16* %a, i32 -127
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, i16* %a, i32 -128
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, ptr %a, i32 -1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, ptr %a, i32 31
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, ptr %a, i32 32
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, ptr %a, i32 -127
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, ptr %a, i32 -128
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i
; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-A32-LABEL: 'testi16'
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* %a, i32 1
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, i16* %a, i32 -1
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, i16* %a, i32 31
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, i16* %a, i32 32
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i16, i16* %a, i32 2046
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, i16* %a, i32 2048
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, i16* %a, i32 -127
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, i16* %a, i32 -128
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, ptr %a, i32 -1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, ptr %a, i32 31
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, ptr %a, i32 32
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, ptr %a, i32 -127
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, ptr %a, i32 -128
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i
; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %a0 = getelementptr inbounds i16, i16* %a, i32 0
- %a1 = getelementptr inbounds i16, i16* %a, i32 1
- %am4 = getelementptr inbounds i16, i16* %a, i32 -1
- %a31 = getelementptr inbounds i16, i16* %a, i32 31
- %a32 = getelementptr inbounds i16, i16* %a, i32 32
- %a4095 = getelementptr inbounds i16, i16* %a, i32 2046
- %a4096 = getelementptr inbounds i16, i16* %a, i32 2048
- %am255 = getelementptr inbounds i16, i16* %a, i32 -127
- %am256 = getelementptr inbounds i16, i16* %a, i32 -128
- %ai = getelementptr inbounds i16, i16* %a, i32 %i
+ %a1 = getelementptr inbounds i16, ptr %a, i32 1
+ %am4 = getelementptr inbounds i16, ptr %a, i32 -1
+ %a31 = getelementptr inbounds i16, ptr %a, i32 31
+ %a32 = getelementptr inbounds i16, ptr %a, i32 32
+ %a4095 = getelementptr inbounds i16, ptr %a, i32 2046
+ %a4096 = getelementptr inbounds i16, ptr %a, i32 2048
+ %am255 = getelementptr inbounds i16, ptr %a, i32 -127
+ %am256 = getelementptr inbounds i16, ptr %a, i32 -128
+ %ai = getelementptr inbounds i16, ptr %a, i32 %i
ret void
}
-define void @testi32(i32* %a, i32 %i) {
+define void @testi32(ptr %a, i32 %i) {
; CHECK-V6M-LABEL: 'testi32'
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, i32* %a, i32 1
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am4 = getelementptr inbounds i32, i32* %a, i32 -1
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, i32* %a, i32 31
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a32 = getelementptr inbounds i32, i32* %a, i32 32
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds i32, i32* %a, i32 1023
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, i32* %a, i32 1024
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i32, i32* %a, i32 -63
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, i32* %a, i32 -64
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am4 = getelementptr inbounds i32, ptr %a, i32 -1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, ptr %a, i32 31
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a32 = getelementptr inbounds i32, ptr %a, i32 32
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i32, ptr %a, i32 -63
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, ptr %a, i32 -64
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i
; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-V7M-NOFP-LABEL: 'testi32'
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, i32* %a, i32 1
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, i32* %a, i32 -1
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, i32* %a, i32 31
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, i32* %a, i32 32
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, i32* %a, i32 1023
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, i32* %a, i32 1024
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, i32* %a, i32 -63
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, i32* %a, i32 -64
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, ptr %a, i32 -1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, ptr %a, i32 31
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, ptr %a, i32 32
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, ptr %a, i32 -63
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, ptr %a, i32 -64
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i
; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-V7M-FP-LABEL: 'testi32'
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, i32* %a, i32 1
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, i32* %a, i32 -1
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, i32* %a, i32 31
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, i32* %a, i32 32
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, i32* %a, i32 1023
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, i32* %a, i32 1024
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, i32* %a, i32 -63
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, i32* %a, i32 -64
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, ptr %a, i32 -1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, ptr %a, i32 31
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, ptr %a, i32 32
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, ptr %a, i32 -63
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, ptr %a, i32 -64
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i
; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-MVE-LABEL: 'testi32'
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, i32* %a, i32 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, i32* %a, i32 -1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, i32* %a, i32 31
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, i32* %a, i32 32
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, i32* %a, i32 1023
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, i32* %a, i32 1024
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, i32* %a, i32 -63
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, i32* %a, i32 -64
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, ptr %a, i32 -1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, ptr %a, i32 31
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, ptr %a, i32 32
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, ptr %a, i32 -63
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, ptr %a, i32 -64
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-MVEFP-LABEL: 'testi32'
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, i32* %a, i32 1
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, i32* %a, i32 -1
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, i32* %a, i32 31
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, i32* %a, i32 32
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, i32* %a, i32 1023
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, i32* %a, i32 1024
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, i32* %a, i32 -63
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, i32* %a, i32 -64
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, ptr %a, i32 -1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, ptr %a, i32 31
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, ptr %a, i32 32
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, ptr %a, i32 -63
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, ptr %a, i32 -64
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-T32-LABEL: 'testi32'
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, i32* %a, i32 1
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, i32* %a, i32 -1
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, i32* %a, i32 31
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, i32* %a, i32 32
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, i32* %a, i32 1023
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, i32* %a, i32 1024
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, i32* %a, i32 -63
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, i32* %a, i32 -64
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, ptr %a, i32 -1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, ptr %a, i32 31
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, ptr %a, i32 32
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, ptr %a, i32 -63
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, ptr %a, i32 -64
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i
; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-A32-LABEL: 'testi32'
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, i32* %a, i32 1
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, i32* %a, i32 -1
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, i32* %a, i32 31
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, i32* %a, i32 32
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, i32* %a, i32 1023
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, i32* %a, i32 1024
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, i32* %a, i32 -63
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i32, i32* %a, i32 -64
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, ptr %a, i32 -1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, ptr %a, i32 31
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, ptr %a, i32 32
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, ptr %a, i32 -63
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i32, ptr %a, i32 -64
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i
; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %a0 = getelementptr inbounds i32, i32* %a, i32 0
- %a1 = getelementptr inbounds i32, i32* %a, i32 1
- %am4 = getelementptr inbounds i32, i32* %a, i32 -1
- %a31 = getelementptr inbounds i32, i32* %a, i32 31
- %a32 = getelementptr inbounds i32, i32* %a, i32 32
- %a1023 = getelementptr inbounds i32, i32* %a, i32 1023
- %a1024 = getelementptr inbounds i32, i32* %a, i32 1024
- %am255 = getelementptr inbounds i32, i32* %a, i32 -63
- %am256 = getelementptr inbounds i32, i32* %a, i32 -64
- %ai = getelementptr inbounds i32, i32* %a, i32 %i
+ %a1 = getelementptr inbounds i32, ptr %a, i32 1
+ %am4 = getelementptr inbounds i32, ptr %a, i32 -1
+ %a31 = getelementptr inbounds i32, ptr %a, i32 31
+ %a32 = getelementptr inbounds i32, ptr %a, i32 32
+ %a1023 = getelementptr inbounds i32, ptr %a, i32 1023
+ %a1024 = getelementptr inbounds i32, ptr %a, i32 1024
+ %am255 = getelementptr inbounds i32, ptr %a, i32 -63
+ %am256 = getelementptr inbounds i32, ptr %a, i32 -64
+ %ai = getelementptr inbounds i32, ptr %a, i32 %i
ret void
}
-define void @testi64(i64* %a, i32 %i) {
+define void @testi64(ptr %a, i32 %i) {
; CHECK-V6M-LABEL: 'testi64'
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, i64* %a, i32 1
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am4 = getelementptr inbounds i64, i64* %a, i32 -1
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, i64* %a, i32 15
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a16 = getelementptr inbounds i64, i64* %a, i32 16
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a31 = getelementptr inbounds i64, i64* %a, i32 31
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a32 = getelementptr inbounds i64, i64* %a, i32 32
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, i64* %a, i32 1023
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, i64* %a, i32 1024
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i64, i64* %a, i32 -63
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i64, i64* %a, i32 -64
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am4 = getelementptr inbounds i64, ptr %a, i32 -1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, ptr %a, i32 15
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a16 = getelementptr inbounds i64, ptr %a, i32 16
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a31 = getelementptr inbounds i64, ptr %a, i32 31
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a32 = getelementptr inbounds i64, ptr %a, i32 32
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i64, ptr %a, i32 -63
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i64, ptr %a, i32 -64
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i
; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-V7M-NOFP-LABEL: 'testi64'
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, i64* %a, i32 1
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, i64* %a, i32 -1
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, i64* %a, i32 15
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, i64* %a, i32 16
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, i64* %a, i32 31
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, i64* %a, i32 32
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, i64* %a, i32 1023
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, i64* %a, i32 1024
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, i64* %a, i32 -63
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, i64* %a, i32 -64
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, ptr %a, i32 -1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, ptr %a, i32 15
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, ptr %a, i32 16
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, ptr %a, i32 31
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, ptr %a, i32 32
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, ptr %a, i32 -63
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, ptr %a, i32 -64
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i
; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-V7M-FP-LABEL: 'testi64'
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, i64* %a, i32 1
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, i64* %a, i32 -1
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, i64* %a, i32 15
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, i64* %a, i32 16
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, i64* %a, i32 31
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, i64* %a, i32 32
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, i64* %a, i32 1023
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, i64* %a, i32 1024
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, i64* %a, i32 -63
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, i64* %a, i32 -64
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, ptr %a, i32 -1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, ptr %a, i32 15
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, ptr %a, i32 16
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, ptr %a, i32 31
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, ptr %a, i32 32
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, ptr %a, i32 -63
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, ptr %a, i32 -64
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i
; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-MVE-LABEL: 'testi64'
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, i64* %a, i32 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, i64* %a, i32 -1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, i64* %a, i32 15
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, i64* %a, i32 16
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, i64* %a, i32 31
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, i64* %a, i32 32
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, i64* %a, i32 1023
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, i64* %a, i32 1024
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, i64* %a, i32 -63
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, i64* %a, i32 -64
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, ptr %a, i32 -1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, ptr %a, i32 15
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, ptr %a, i32 16
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, ptr %a, i32 31
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, ptr %a, i32 32
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, ptr %a, i32 -63
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, ptr %a, i32 -64
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-MVEFP-LABEL: 'testi64'
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, i64* %a, i32 1
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, i64* %a, i32 -1
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, i64* %a, i32 15
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, i64* %a, i32 16
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, i64* %a, i32 31
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, i64* %a, i32 32
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, i64* %a, i32 1023
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, i64* %a, i32 1024
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, i64* %a, i32 -63
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, i64* %a, i32 -64
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, ptr %a, i32 -1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, ptr %a, i32 15
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, ptr %a, i32 16
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, ptr %a, i32 31
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, ptr %a, i32 32
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, ptr %a, i32 -63
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, ptr %a, i32 -64
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-T32-LABEL: 'testi64'
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, i64* %a, i32 1
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, i64* %a, i32 -1
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, i64* %a, i32 15
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, i64* %a, i32 16
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, i64* %a, i32 31
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, i64* %a, i32 32
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, i64* %a, i32 1023
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, i64* %a, i32 1024
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, i64* %a, i32 -63
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, i64* %a, i32 -64
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, ptr %a, i32 -1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, ptr %a, i32 15
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, ptr %a, i32 16
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, ptr %a, i32 31
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, ptr %a, i32 32
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, ptr %a, i32 -63
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, ptr %a, i32 -64
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i
; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-A32-LABEL: 'testi64'
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1 = getelementptr inbounds i64, i64* %a, i32 1
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am4 = getelementptr inbounds i64, i64* %a, i32 -1
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a15 = getelementptr inbounds i64, i64* %a, i32 15
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a16 = getelementptr inbounds i64, i64* %a, i32 16
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a31 = getelementptr inbounds i64, i64* %a, i32 31
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a32 = getelementptr inbounds i64, i64* %a, i32 32
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, i64* %a, i32 1023
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, i64* %a, i32 1024
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i64, i64* %a, i32 -63
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i64, i64* %a, i32 -64
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am4 = getelementptr inbounds i64, ptr %a, i32 -1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a15 = getelementptr inbounds i64, ptr %a, i32 15
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a16 = getelementptr inbounds i64, ptr %a, i32 16
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a31 = getelementptr inbounds i64, ptr %a, i32 31
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a32 = getelementptr inbounds i64, ptr %a, i32 32
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i64, ptr %a, i32 -63
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i64, ptr %a, i32 -64
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i
; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %a0 = getelementptr inbounds i64, i64* %a, i32 0
- %a1 = getelementptr inbounds i64, i64* %a, i32 1
- %am4 = getelementptr inbounds i64, i64* %a, i32 -1
- %a15 = getelementptr inbounds i64, i64* %a, i32 15
- %a16 = getelementptr inbounds i64, i64* %a, i32 16
- %a31 = getelementptr inbounds i64, i64* %a, i32 31
- %a32 = getelementptr inbounds i64, i64* %a, i32 32
- %a4095 = getelementptr inbounds i64, i64* %a, i32 1023
- %a4096 = getelementptr inbounds i64, i64* %a, i32 1024
- %am255 = getelementptr inbounds i64, i64* %a, i32 -63
- %am256 = getelementptr inbounds i64, i64* %a, i32 -64
- %ai = getelementptr inbounds i64, i64* %a, i32 %i
+ %a1 = getelementptr inbounds i64, ptr %a, i32 1
+ %am4 = getelementptr inbounds i64, ptr %a, i32 -1
+ %a15 = getelementptr inbounds i64, ptr %a, i32 15
+ %a16 = getelementptr inbounds i64, ptr %a, i32 16
+ %a31 = getelementptr inbounds i64, ptr %a, i32 31
+ %a32 = getelementptr inbounds i64, ptr %a, i32 32
+ %a4095 = getelementptr inbounds i64, ptr %a, i32 1023
+ %a4096 = getelementptr inbounds i64, ptr %a, i32 1024
+ %am255 = getelementptr inbounds i64, ptr %a, i32 -63
+ %am256 = getelementptr inbounds i64, ptr %a, i32 -64
+ %ai = getelementptr inbounds i64, ptr %a, i32 %i
ret void
}
-define void @testhalf(half* %a, i32 %i) {
+define void @testhalf(ptr %a, i32 %i) {
; CHECK-V6M-LABEL: 'testhalf'
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds half, half* %a, i32 0
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1 = getelementptr inbounds half, half* %a, i32 1
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds half, half* %a, i32 -1
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds half, half* %a, i32 255
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds half, half* %a, i32 256
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds half, half* %a, i32 -255
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds half, half* %a, i32 -256
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, half* %a, i32 1023
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, half* %a, i32 1024
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds half, half* %a, i32 -63
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds half, half* %a, i32 -64
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, half* %a, i32 %i
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1 = getelementptr inbounds half, ptr %a, i32 1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds half, ptr %a, i32 -1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds half, ptr %a, i32 255
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds half, ptr %a, i32 256
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds half, ptr %a, i32 -255
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds half, ptr %a, i32 -256
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, ptr %a, i32 1023
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, ptr %a, i32 1024
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds half, ptr %a, i32 -63
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds half, ptr %a, i32 -64
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, ptr %a, i32 %i
; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-V7M-NOFP-LABEL: 'testhalf'
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds half, half* %a, i32 0
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds half, half* %a, i32 1
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds half, half* %a, i32 -1
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds half, half* %a, i32 255
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a256 = getelementptr inbounds half, half* %a, i32 256
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds half, half* %a, i32 -255
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds half, half* %a, i32 -256
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds half, half* %a, i32 1023
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1024 = getelementptr inbounds half, half* %a, i32 1024
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds half, half* %a, i32 -63
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, half* %a, i32 -64
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, half* %a, i32 %i
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds half, ptr %a, i32 1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds half, ptr %a, i32 -1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds half, ptr %a, i32 255
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a256 = getelementptr inbounds half, ptr %a, i32 256
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds half, ptr %a, i32 -255
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds half, ptr %a, i32 -256
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds half, ptr %a, i32 1023
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1024 = getelementptr inbounds half, ptr %a, i32 1024
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds half, ptr %a, i32 -63
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, ptr %a, i32 -64
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, ptr %a, i32 %i
; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-V7M-FP-LABEL: 'testhalf'
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds half, half* %a, i32 0
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1 = getelementptr inbounds half, half* %a, i32 1
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds half, half* %a, i32 -1
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds half, half* %a, i32 255
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a256 = getelementptr inbounds half, half* %a, i32 256
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds half, half* %a, i32 -255
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds half, half* %a, i32 -256
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, half* %a, i32 1023
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, half* %a, i32 1024
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds half, half* %a, i32 -63
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, half* %a, i32 -64
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, half* %a, i32 %i
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1 = getelementptr inbounds half, ptr %a, i32 1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds half, ptr %a, i32 -1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds half, ptr %a, i32 255
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a256 = getelementptr inbounds half, ptr %a, i32 256
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds half, ptr %a, i32 -255
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds half, ptr %a, i32 -256
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, ptr %a, i32 1023
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, ptr %a, i32 1024
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds half, ptr %a, i32 -63
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, ptr %a, i32 -64
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, ptr %a, i32 %i
; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-MVE-LABEL: 'testhalf'
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds half, half* %a, i32 0
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds half, half* %a, i32 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds half, half* %a, i32 -1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds half, half* %a, i32 255
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds half, half* %a, i32 256
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds half, half* %a, i32 -255
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds half, half* %a, i32 -256
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, half* %a, i32 1023
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, half* %a, i32 1024
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds half, half* %a, i32 -63
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, half* %a, i32 -64
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, half* %a, i32 %i
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds half, ptr %a, i32 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds half, ptr %a, i32 -1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds half, ptr %a, i32 255
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds half, ptr %a, i32 256
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds half, ptr %a, i32 -255
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds half, ptr %a, i32 -256
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, ptr %a, i32 1023
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, ptr %a, i32 1024
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds half, ptr %a, i32 -63
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, ptr %a, i32 -64
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, ptr %a, i32 %i
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-MVEFP-LABEL: 'testhalf'
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds half, half* %a, i32 0
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds half, half* %a, i32 1
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds half, half* %a, i32 -1
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds half, half* %a, i32 255
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds half, half* %a, i32 256
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds half, half* %a, i32 -255
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds half, half* %a, i32 -256
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, half* %a, i32 1023
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, half* %a, i32 1024
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds half, half* %a, i32 -63
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, half* %a, i32 -64
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, half* %a, i32 %i
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds half, ptr %a, i32 1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds half, ptr %a, i32 -1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds half, ptr %a, i32 255
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds half, ptr %a, i32 256
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds half, ptr %a, i32 -255
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds half, ptr %a, i32 -256
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, ptr %a, i32 1023
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, ptr %a, i32 1024
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds half, ptr %a, i32 -63
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, ptr %a, i32 -64
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, ptr %a, i32 %i
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-T32-LABEL: 'testhalf'
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds half, half* %a, i32 0
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1 = getelementptr inbounds half, half* %a, i32 1
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds half, half* %a, i32 -1
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds half, half* %a, i32 255
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a256 = getelementptr inbounds half, half* %a, i32 256
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds half, half* %a, i32 -255
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds half, half* %a, i32 -256
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, half* %a, i32 1023
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, half* %a, i32 1024
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds half, half* %a, i32 -63
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, half* %a, i32 -64
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, half* %a, i32 %i
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1 = getelementptr inbounds half, ptr %a, i32 1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds half, ptr %a, i32 -1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds half, ptr %a, i32 255
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a256 = getelementptr inbounds half, ptr %a, i32 256
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds half, ptr %a, i32 -255
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds half, ptr %a, i32 -256
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, ptr %a, i32 1023
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, ptr %a, i32 1024
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds half, ptr %a, i32 -63
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, ptr %a, i32 -64
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, ptr %a, i32 %i
; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-A32-LABEL: 'testhalf'
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds half, half* %a, i32 0
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1 = getelementptr inbounds half, half* %a, i32 1
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds half, half* %a, i32 -1
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds half, half* %a, i32 255
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds half, half* %a, i32 256
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds half, half* %a, i32 -255
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds half, half* %a, i32 -256
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, half* %a, i32 1023
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, half* %a, i32 1024
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds half, half* %a, i32 -63
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds half, half* %a, i32 -64
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, half* %a, i32 %i
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1 = getelementptr inbounds half, ptr %a, i32 1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds half, ptr %a, i32 -1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds half, ptr %a, i32 255
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds half, ptr %a, i32 256
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds half, ptr %a, i32 -255
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds half, ptr %a, i32 -256
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds half, ptr %a, i32 1023
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds half, ptr %a, i32 1024
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds half, ptr %a, i32 -63
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds half, ptr %a, i32 -64
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, ptr %a, i32 %i
; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %a0 = getelementptr inbounds half, half* %a, i32 0
- %a1 = getelementptr inbounds half, half* %a, i32 1
- %am1 = getelementptr inbounds half, half* %a, i32 -1
- %a255 = getelementptr inbounds half, half* %a, i32 255
- %a256 = getelementptr inbounds half, half* %a, i32 256
- %am255 = getelementptr inbounds half, half* %a, i32 -255
- %am256 = getelementptr inbounds half, half* %a, i32 -256
- %a1023 = getelementptr inbounds half, half* %a, i32 1023
- %a1024 = getelementptr inbounds half, half* %a, i32 1024
- %am63 = getelementptr inbounds half, half* %a, i32 -63
- %am64 = getelementptr inbounds half, half* %a, i32 -64
- %ai = getelementptr inbounds half, half* %a, i32 %i
+ %a1 = getelementptr inbounds half, ptr %a, i32 1
+ %am1 = getelementptr inbounds half, ptr %a, i32 -1
+ %a255 = getelementptr inbounds half, ptr %a, i32 255
+ %a256 = getelementptr inbounds half, ptr %a, i32 256
+ %am255 = getelementptr inbounds half, ptr %a, i32 -255
+ %am256 = getelementptr inbounds half, ptr %a, i32 -256
+ %a1023 = getelementptr inbounds half, ptr %a, i32 1023
+ %a1024 = getelementptr inbounds half, ptr %a, i32 1024
+ %am63 = getelementptr inbounds half, ptr %a, i32 -63
+ %am64 = getelementptr inbounds half, ptr %a, i32 -64
+ %ai = getelementptr inbounds half, ptr %a, i32 %i
ret void
}
-define void @testfloat(float* %a, i32 %i) {
+define void @testfloat(ptr %a, i32 %i) {
; CHECK-V6M-LABEL: 'testfloat'
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds float, float* %a, i32 0
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, float* %a, i32 1
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds float, float* %a, i32 -1
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds float, float* %a, i32 255
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds float, float* %a, i32 256
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds float, float* %a, i32 -255
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, float* %a, i32 -256
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds float, float* %a, i32 1023
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, float* %a, i32 1024
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds float, float* %a, i32 -63
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds float, float* %a, i32 -64
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, float* %a, i32 %i
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds float, ptr %a, i32 -1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds float, ptr %a, i32 255
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds float, ptr %a, i32 256
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds float, ptr %a, i32 -255
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, ptr %a, i32 -256
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds float, ptr %a, i32 1023
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, ptr %a, i32 1024
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds float, ptr %a, i32 -63
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds float, ptr %a, i32 -64
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i
; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-V7M-NOFP-LABEL: 'testfloat'
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds float, float* %a, i32 0
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, float* %a, i32 1
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, float* %a, i32 -1
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, float* %a, i32 255
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a256 = getelementptr inbounds float, float* %a, i32 256
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds float, float* %a, i32 -255
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, float* %a, i32 -256
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds float, float* %a, i32 1023
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, float* %a, i32 1024
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, float* %a, i32 -63
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds float, float* %a, i32 -64
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, float* %a, i32 %i
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, ptr %a, i32 -1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, ptr %a, i32 255
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a256 = getelementptr inbounds float, ptr %a, i32 256
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds float, ptr %a, i32 -255
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, ptr %a, i32 -256
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds float, ptr %a, i32 1023
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, ptr %a, i32 1024
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, ptr %a, i32 -63
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds float, ptr %a, i32 -64
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i
; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-V7M-FP-LABEL: 'testfloat'
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds float, float* %a, i32 0
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, float* %a, i32 1
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, float* %a, i32 -1
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, float* %a, i32 255
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds float, float* %a, i32 256
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds float, float* %a, i32 -255
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, float* %a, i32 -256
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds float, float* %a, i32 1023
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, float* %a, i32 1024
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, float* %a, i32 -63
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, float* %a, i32 -64
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, float* %a, i32 %i
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, ptr %a, i32 -1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, ptr %a, i32 255
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds float, ptr %a, i32 256
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds float, ptr %a, i32 -255
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, ptr %a, i32 -256
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds float, ptr %a, i32 1023
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, ptr %a, i32 1024
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, ptr %a, i32 -63
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, ptr %a, i32 -64
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i
; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-MVE-LABEL: 'testfloat'
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds float, float* %a, i32 0
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, float* %a, i32 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, float* %a, i32 -1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, float* %a, i32 255
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a256 = getelementptr inbounds float, float* %a, i32 256
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds float, float* %a, i32 -255
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, float* %a, i32 -256
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds float, float* %a, i32 1023
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, float* %a, i32 1024
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, float* %a, i32 -63
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds float, float* %a, i32 -64
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, float* %a, i32 %i
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, ptr %a, i32 -1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, ptr %a, i32 255
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a256 = getelementptr inbounds float, ptr %a, i32 256
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds float, ptr %a, i32 -255
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, ptr %a, i32 -256
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds float, ptr %a, i32 1023
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, ptr %a, i32 1024
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, ptr %a, i32 -63
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds float, ptr %a, i32 -64
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-MVEFP-LABEL: 'testfloat'
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds float, float* %a, i32 0
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, float* %a, i32 1
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, float* %a, i32 -1
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, float* %a, i32 255
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds float, float* %a, i32 256
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds float, float* %a, i32 -255
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, float* %a, i32 -256
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds float, float* %a, i32 1023
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, float* %a, i32 1024
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, float* %a, i32 -63
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, float* %a, i32 -64
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, float* %a, i32 %i
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, ptr %a, i32 -1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, ptr %a, i32 255
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds float, ptr %a, i32 256
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds float, ptr %a, i32 -255
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, ptr %a, i32 -256
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds float, ptr %a, i32 1023
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, ptr %a, i32 1024
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, ptr %a, i32 -63
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, ptr %a, i32 -64
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-T32-LABEL: 'testfloat'
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds float, float* %a, i32 0
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, float* %a, i32 1
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, float* %a, i32 -1
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, float* %a, i32 255
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds float, float* %a, i32 256
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds float, float* %a, i32 -255
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, float* %a, i32 -256
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds float, float* %a, i32 1023
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, float* %a, i32 1024
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, float* %a, i32 -63
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, float* %a, i32 -64
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, float* %a, i32 %i
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, ptr %a, i32 -1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, ptr %a, i32 255
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds float, ptr %a, i32 256
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds float, ptr %a, i32 -255
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, ptr %a, i32 -256
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds float, ptr %a, i32 1023
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, ptr %a, i32 1024
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, ptr %a, i32 -63
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, ptr %a, i32 -64
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i
; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-A32-LABEL: 'testfloat'
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds float, float* %a, i32 0
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, float* %a, i32 1
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, float* %a, i32 -1
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, float* %a, i32 255
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds float, float* %a, i32 256
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds float, float* %a, i32 -255
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, float* %a, i32 -256
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds float, float* %a, i32 1023
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, float* %a, i32 1024
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, float* %a, i32 -63
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, float* %a, i32 -64
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, float* %a, i32 %i
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds float, ptr %a, i32 -1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds float, ptr %a, i32 255
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds float, ptr %a, i32 256
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds float, ptr %a, i32 -255
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds float, ptr %a, i32 -256
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds float, ptr %a, i32 1023
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds float, ptr %a, i32 1024
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, ptr %a, i32 -63
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, ptr %a, i32 -64
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i
; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %a0 = getelementptr inbounds float, float* %a, i32 0
- %a1 = getelementptr inbounds float, float* %a, i32 1
- %am1 = getelementptr inbounds float, float* %a, i32 -1
- %a255 = getelementptr inbounds float, float* %a, i32 255
- %a256 = getelementptr inbounds float, float* %a, i32 256
- %am255 = getelementptr inbounds float, float* %a, i32 -255
- %am256 = getelementptr inbounds float, float* %a, i32 -256
- %a1023 = getelementptr inbounds float, float* %a, i32 1023
- %a1024 = getelementptr inbounds float, float* %a, i32 1024
- %am63 = getelementptr inbounds float, float* %a, i32 -63
- %am64 = getelementptr inbounds float, float* %a, i32 -64
- %ai = getelementptr inbounds float, float* %a, i32 %i
+ %a1 = getelementptr inbounds float, ptr %a, i32 1
+ %am1 = getelementptr inbounds float, ptr %a, i32 -1
+ %a255 = getelementptr inbounds float, ptr %a, i32 255
+ %a256 = getelementptr inbounds float, ptr %a, i32 256
+ %am255 = getelementptr inbounds float, ptr %a, i32 -255
+ %am256 = getelementptr inbounds float, ptr %a, i32 -256
+ %a1023 = getelementptr inbounds float, ptr %a, i32 1023
+ %a1024 = getelementptr inbounds float, ptr %a, i32 1024
+ %am63 = getelementptr inbounds float, ptr %a, i32 -63
+ %am64 = getelementptr inbounds float, ptr %a, i32 -64
+ %ai = getelementptr inbounds float, ptr %a, i32 %i
ret void
}
-define void @testdouble(double* %a, i32 %i) {
+define void @testdouble(ptr %a, i32 %i) {
; CHECK-V6M-LABEL: 'testdouble'
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, double* %a, i32 1
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds double, double* %a, i32 -1
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds double, double* %a, i32 127
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, double* %a, i32 128
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds double, double* %a, i32 -127
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, double* %a, i32 -128
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, double* %a, i32 511
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, double* %a, i32 512
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds double, double* %a, i32 -31
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds double, double* %a, i32 -32
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am1 = getelementptr inbounds double, ptr %a, i32 -1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a255 = getelementptr inbounds double, ptr %a, i32 127
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, ptr %a, i32 128
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds double, ptr %a, i32 -127
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, ptr %a, i32 -128
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, ptr %a, i32 511
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, ptr %a, i32 512
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds double, ptr %a, i32 -31
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds double, ptr %a, i32 -32
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i
; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-V7M-NOFP-LABEL: 'testdouble'
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, double* %a, i32 1
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, double* %a, i32 -1
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, double* %a, i32 127
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, double* %a, i32 128
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, double* %a, i32 -127
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, double* %a, i32 -128
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, double* %a, i32 511
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, double* %a, i32 512
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, double* %a, i32 -31
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, double* %a, i32 -32
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, ptr %a, i32 -1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, ptr %a, i32 127
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, ptr %a, i32 128
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, ptr %a, i32 -127
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, ptr %a, i32 -128
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, ptr %a, i32 511
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, ptr %a, i32 512
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, ptr %a, i32 -31
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, ptr %a, i32 -32
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i
; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-V7M-FP-LABEL: 'testdouble'
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, double* %a, i32 1
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, double* %a, i32 -1
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, double* %a, i32 127
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, double* %a, i32 128
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, double* %a, i32 -127
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, double* %a, i32 -128
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, double* %a, i32 511
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, double* %a, i32 512
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, double* %a, i32 -31
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, double* %a, i32 -32
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, ptr %a, i32 -1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, ptr %a, i32 127
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, ptr %a, i32 128
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, ptr %a, i32 -127
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, ptr %a, i32 -128
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, ptr %a, i32 511
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, ptr %a, i32 512
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, ptr %a, i32 -31
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, ptr %a, i32 -32
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i
; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-MVE-LABEL: 'testdouble'
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, double* %a, i32 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, double* %a, i32 -1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, double* %a, i32 127
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, double* %a, i32 128
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, double* %a, i32 -127
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, double* %a, i32 -128
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, double* %a, i32 511
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, double* %a, i32 512
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, double* %a, i32 -31
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, double* %a, i32 -32
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, ptr %a, i32 -1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, ptr %a, i32 127
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, ptr %a, i32 128
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, ptr %a, i32 -127
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, ptr %a, i32 -128
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, ptr %a, i32 511
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, ptr %a, i32 512
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, ptr %a, i32 -31
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, ptr %a, i32 -32
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-MVEFP-LABEL: 'testdouble'
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, double* %a, i32 1
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, double* %a, i32 -1
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, double* %a, i32 127
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, double* %a, i32 128
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, double* %a, i32 -127
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, double* %a, i32 -128
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, double* %a, i32 511
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, double* %a, i32 512
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, double* %a, i32 -31
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, double* %a, i32 -32
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, ptr %a, i32 -1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, ptr %a, i32 127
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, ptr %a, i32 128
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, ptr %a, i32 -127
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, ptr %a, i32 -128
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, ptr %a, i32 511
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, ptr %a, i32 512
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, ptr %a, i32 -31
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, ptr %a, i32 -32
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-T32-LABEL: 'testdouble'
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, double* %a, i32 1
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, double* %a, i32 -1
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, double* %a, i32 127
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, double* %a, i32 128
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, double* %a, i32 -127
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, double* %a, i32 -128
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, double* %a, i32 511
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, double* %a, i32 512
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, double* %a, i32 -31
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, double* %a, i32 -32
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, ptr %a, i32 -1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, ptr %a, i32 127
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, ptr %a, i32 128
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, ptr %a, i32 -127
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, ptr %a, i32 -128
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, ptr %a, i32 511
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, ptr %a, i32 512
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, ptr %a, i32 -31
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, ptr %a, i32 -32
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i
; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-A32-LABEL: 'testdouble'
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, double* %a, i32 1
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, double* %a, i32 -1
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, double* %a, i32 127
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, double* %a, i32 128
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, double* %a, i32 -127
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, double* %a, i32 -128
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, double* %a, i32 511
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, double* %a, i32 512
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, double* %a, i32 -31
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, double* %a, i32 -32
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, ptr %a, i32 -1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, ptr %a, i32 127
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, ptr %a, i32 128
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, ptr %a, i32 -127
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, ptr %a, i32 -128
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, ptr %a, i32 511
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, ptr %a, i32 512
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, ptr %a, i32 -31
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, ptr %a, i32 -32
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i
; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %a0 = getelementptr inbounds double, double* %a, i32 0
- %a1 = getelementptr inbounds double, double* %a, i32 1
- %am1 = getelementptr inbounds double, double* %a, i32 -1
- %a255 = getelementptr inbounds double, double* %a, i32 127
- %a256 = getelementptr inbounds double, double* %a, i32 128
- %am255 = getelementptr inbounds double, double* %a, i32 -127
- %am256 = getelementptr inbounds double, double* %a, i32 -128
- %a1023 = getelementptr inbounds double, double* %a, i32 511
- %a1024 = getelementptr inbounds double, double* %a, i32 512
- %am63 = getelementptr inbounds double, double* %a, i32 -31
- %am64 = getelementptr inbounds double, double* %a, i32 -32
- %ai = getelementptr inbounds double, double* %a, i32 %i
+ %a1 = getelementptr inbounds double, ptr %a, i32 1
+ %am1 = getelementptr inbounds double, ptr %a, i32 -1
+ %a255 = getelementptr inbounds double, ptr %a, i32 127
+ %a256 = getelementptr inbounds double, ptr %a, i32 128
+ %am255 = getelementptr inbounds double, ptr %a, i32 -127
+ %am256 = getelementptr inbounds double, ptr %a, i32 -128
+ %a1023 = getelementptr inbounds double, ptr %a, i32 511
+ %a1024 = getelementptr inbounds double, ptr %a, i32 512
+ %am63 = getelementptr inbounds double, ptr %a, i32 -31
+ %am64 = getelementptr inbounds double, ptr %a, i32 -32
+ %ai = getelementptr inbounds double, ptr %a, i32 %i
ret void
}
define void @testvecs(i32 %i) {
; CHECK-V6M-LABEL: 'testvecs'
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 0
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 1
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 1
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 1
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 1
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 1
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 1
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 1
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 4
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 4
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 4
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 4
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 4
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 4
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 4
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 31
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 31
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 31
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 31
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 31
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 31
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 31
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 32
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 32
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 32
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 32
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 32
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 32
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 32
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 -31
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 -31
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 -31
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 -31
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 -31
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 -31
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 -31
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 -32
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 -32
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 -32
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 -32
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 -32
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 -32
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 -32
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 %i
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 %i
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 %i
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 %i
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 %i
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i
-; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d0 = getelementptr inbounds i8, i8* undef, i32 -1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
+; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d0 = getelementptr inbounds i8, ptr undef, i32 -1
; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-V7M-NOFP-LABEL: 'testvecs'
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 0
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 1
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 1
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 1
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 1
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 1
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 1
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 1
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 4
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 4
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 4
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 4
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 4
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 4
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 4
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 31
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 31
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 31
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 31
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 31
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 31
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 31
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %q7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 32
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %q8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 32
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 32
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 32
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %q11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 32
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 32
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 32
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 -31
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 -31
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 -31
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 -31
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 -31
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 -31
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 -31
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 -32
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 -32
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 -32
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 -32
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 -32
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 -32
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 -32
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 %i
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 %i
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 %i
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 %i
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 %i
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i
-; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, i8* undef, i32 -1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
+; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, ptr undef, i32 -1
; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-V7M-FP-LABEL: 'testvecs'
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 0
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 1
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 1
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 1
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 1
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 1
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 1
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 1
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 4
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 4
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 4
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 4
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 4
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 4
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 4
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 31
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 31
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 31
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 31
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 31
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 31
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 31
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %q7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 32
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %q8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 32
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 32
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 32
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %q11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 32
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %q12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 32
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 32
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 -31
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 -31
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 -31
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 -31
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 -31
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 -31
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 -31
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 -32
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 -32
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 -32
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 -32
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 -32
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 -32
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 -32
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 %i
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 %i
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 %i
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 %i
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 %i
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i
-; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, i8* undef, i32 -1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
+; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, ptr undef, i32 -1
; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-MVE-LABEL: 'testvecs'
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 0
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 31
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 31
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 31
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 31
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 31
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 31
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 31
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 32
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 32
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 32
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 32
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 32
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 32
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 32
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 -31
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 -31
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 -31
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 -31
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 -31
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 -31
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 -31
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 -32
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 -32
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 -32
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 -32
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 -32
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 -32
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 -32
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 %i
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 %i
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 %i
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 %i
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 %i
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, i8* undef, i32 -1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, ptr undef, i32 -1
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-MVEFP-LABEL: 'testvecs'
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 0
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 1
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 1
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 1
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 1
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 1
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 1
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 1
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 4
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 4
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 4
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 4
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 4
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 4
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 4
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 31
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 31
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 31
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 31
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 31
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 31
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 31
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 32
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 32
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 32
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 32
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 32
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 32
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 32
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 -31
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 -31
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 -31
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 -31
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 -31
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 -31
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 -31
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 -32
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 -32
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 -32
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 -32
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 -32
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 -32
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 -32
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 %i
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 %i
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 %i
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 %i
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 %i
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i
-; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, i8* undef, i32 -1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
+; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, ptr undef, i32 -1
; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-T32-LABEL: 'testvecs'
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 0
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 1
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 1
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 1
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 1
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 1
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 1
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 1
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 4
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 4
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 4
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 4
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 4
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 4
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 4
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 31
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 31
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 31
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 31
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 31
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 31
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 31
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 32
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 32
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 32
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 32
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 32
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 32
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 32
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 -31
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 -31
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 -31
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 -31
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 -31
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 -31
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 -31
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 -32
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 -32
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 -32
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 -32
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 -32
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 -32
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 -32
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 %i
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 %i
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 %i
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 %i
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 %i
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i
-; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, i8* undef, i32 -1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
+; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, ptr undef, i32 -1
; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-A32-LABEL: 'testvecs'
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 0
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 1
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 1
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 1
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 1
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 1
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 1
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 1
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 4
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 4
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 4
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 4
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 4
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 4
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 4
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 31
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 31
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 31
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 31
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 31
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 31
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 31
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 32
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 32
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 32
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 32
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 32
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 32
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 32
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 -31
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 -31
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 -31
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 -31
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 -31
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 -31
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 -31
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 -32
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 -32
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 -32
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 -32
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 -32
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 -32
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 -32
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 %i
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 %i
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 %i
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 %i
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 %i
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i
-; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, i8* undef, i32 -1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
+; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, ptr undef, i32 -1
; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
- %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
- %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
- %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
- %a11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 0
- %a12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
- %a13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
- %b7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 1
- %b8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 1
- %b9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 1
- %b10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 1
- %b11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 1
- %b12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 1
- %b13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 1
+ %b7 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
+ %b8 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
+ %b9 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
+ %b10 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
+ %b11 = getelementptr inbounds <4 x half>, ptr undef, i32 1
+ %b12 = getelementptr inbounds <4 x float>, ptr undef, i32 1
+ %b13 = getelementptr inbounds <4 x double>, ptr undef, i32 1
- %o7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 4
- %o8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 4
- %o9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 4
- %o10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 4
- %o11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 4
- %o12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 4
- %o13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 4
+ %o7 = getelementptr inbounds <4 x i8>, ptr undef, i32 4
+ %o8 = getelementptr inbounds <4 x i16>, ptr undef, i32 4
+ %o9 = getelementptr inbounds <4 x i32>, ptr undef, i32 4
+ %o10 = getelementptr inbounds <4 x i64>, ptr undef, i32 4
+ %o11 = getelementptr inbounds <4 x half>, ptr undef, i32 4
+ %o12 = getelementptr inbounds <4 x float>, ptr undef, i32 4
+ %o13 = getelementptr inbounds <4 x double>, ptr undef, i32 4
- %p7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 31
- %p8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 31
- %p9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 31
- %p10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 31
- %p11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 31
- %p12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 31
- %p13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 31
+ %p7 = getelementptr inbounds <4 x i8>, ptr undef, i32 31
+ %p8 = getelementptr inbounds <4 x i16>, ptr undef, i32 31
+ %p9 = getelementptr inbounds <4 x i32>, ptr undef, i32 31
+ %p10 = getelementptr inbounds <4 x i64>, ptr undef, i32 31
+ %p11 = getelementptr inbounds <4 x half>, ptr undef, i32 31
+ %p12 = getelementptr inbounds <4 x float>, ptr undef, i32 31
+ %p13 = getelementptr inbounds <4 x double>, ptr undef, i32 31
- %q7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 32
- %q8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 32
- %q9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 32
- %q10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 32
- %q11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 32
- %q12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 32
- %q13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 32
+ %q7 = getelementptr inbounds <4 x i8>, ptr undef, i32 32
+ %q8 = getelementptr inbounds <4 x i16>, ptr undef, i32 32
+ %q9 = getelementptr inbounds <4 x i32>, ptr undef, i32 32
+ %q10 = getelementptr inbounds <4 x i64>, ptr undef, i32 32
+ %q11 = getelementptr inbounds <4 x half>, ptr undef, i32 32
+ %q12 = getelementptr inbounds <4 x float>, ptr undef, i32 32
+ %q13 = getelementptr inbounds <4 x double>, ptr undef, i32 32
- %r7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 -31
- %r8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 -31
- %r9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 -31
- %r10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 -31
- %r11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 -31
- %r12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 -31
- %r13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 -31
+ %r7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -31
+ %r8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -31
+ %r9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -31
+ %r10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -31
+ %r11 = getelementptr inbounds <4 x half>, ptr undef, i32 -31
+ %r12 = getelementptr inbounds <4 x float>, ptr undef, i32 -31
+ %r13 = getelementptr inbounds <4 x double>, ptr undef, i32 -31
- %s7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 -32
- %s8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 -32
- %s9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 -32
- %s10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 -32
- %s11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 -32
- %s12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 -32
- %s13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 -32
+ %s7 = getelementptr inbounds <4 x i8>, ptr undef, i32 -32
+ %s8 = getelementptr inbounds <4 x i16>, ptr undef, i32 -32
+ %s9 = getelementptr inbounds <4 x i32>, ptr undef, i32 -32
+ %s10 = getelementptr inbounds <4 x i64>, ptr undef, i32 -32
+ %s11 = getelementptr inbounds <4 x half>, ptr undef, i32 -32
+ %s12 = getelementptr inbounds <4 x float>, ptr undef, i32 -32
+ %s13 = getelementptr inbounds <4 x double>, ptr undef, i32 -32
- %c7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 %i
- %c8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 %i
- %c9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 %i
- %c10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 %i
- %c11 = getelementptr inbounds <4 x half>, <4 x half>* undef, i32 %i
- %c12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i
- %c13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i
+ %c7 = getelementptr inbounds <4 x i8>, ptr undef, i32 %i
+ %c8 = getelementptr inbounds <4 x i16>, ptr undef, i32 %i
+ %c9 = getelementptr inbounds <4 x i32>, ptr undef, i32 %i
+ %c10 = getelementptr inbounds <4 x i64>, ptr undef, i32 %i
+ %c11 = getelementptr inbounds <4 x half>, ptr undef, i32 %i
+ %c12 = getelementptr inbounds <4 x float>, ptr undef, i32 %i
+ %c13 = getelementptr inbounds <4 x double>, ptr undef, i32 %i
- %d0 = getelementptr inbounds i8, i8* undef, i32 -1
+ %d0 = getelementptr inbounds i8, ptr undef, i32 -1
ret void
}
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7
-; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, i32* undef, i32 1
-; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, i32* undef, i32 16
+; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, ptr undef, i32 1
+; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, ptr undef, i32 16
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256
; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024
; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1
; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1
; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7
-; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, i32* undef, i32 1
-; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, i32* undef, i32 16
+; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, ptr undef, i32 1
+; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, ptr undef, i32 16
; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244
; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256
; CHECK-T2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7
-; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, i32* undef, i32 1
-; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, i32* undef, i32 16
+; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, ptr undef, i32 1
+; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, ptr undef, i32 16
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256
; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024
; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1
; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1
; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7
-; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, i32* undef, i32 1
-; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, i32* undef, i32 16
+; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, ptr undef, i32 1
+; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, ptr undef, i32 16
; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244
; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256
; CHECK-T2-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7
-; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, i32* undef, i32 1
-; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, i32* undef, i32 16
+; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, ptr undef, i32 1
+; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, ptr undef, i32 16
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256
; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024
; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %and_1 = and i32 undef, 1
; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_1 = xor i32 undef, 1
; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %xor_7 = xor i32 undef, 7
-; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, i32* undef, i32 1
-; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, i32* undef, i32 16
+; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_1 = getelementptr i32, ptr undef, i32 1
+; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep_16 = getelementptr i32, ptr undef, i32 16
; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_244 = icmp ne i32 undef, 244
; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_256 = icmp uge i32 undef, 256
; CHECK-T2-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp_1024 = icmp ult i32 undef, 1024
%and_1 = and i32 undef, 1
%xor_1 = xor i32 undef, 1
%xor_7 = xor i32 undef, 7
- %gep_1 = getelementptr i32, i32* undef, i32 1
- %gep_16 = getelementptr i32, i32* undef, i32 16
+ %gep_1 = getelementptr i32, ptr undef, i32 1
+ %gep_16 = getelementptr i32, ptr undef, i32 16
%cmp_244 = icmp ne i32 undef, 244
%cmp_256 = icmp uge i32 undef, 256
%cmp_1024 = icmp ult i32 undef, 1024
%T_i8v = type <8 x i8>
%T_i8 = type i8
; CHECK: insertelement_i8
-define void @insertelement_i8(%T_i8* %saddr,
- %T_i8v* %vaddr) {
- %v0 = load %T_i8v, %T_i8v* %vaddr
- %v1 = load %T_i8, %T_i8* %saddr
+define void @insertelement_i8(ptr %saddr,
+ ptr %vaddr) {
+ %v0 = load %T_i8v, ptr %vaddr
+ %v1 = load %T_i8, ptr %saddr
;CHECK: estimated cost of 3 for {{.*}} insertelement <8 x i8>
%v2 = insertelement %T_i8v %v0, %T_i8 %v1, i32 1
- store %T_i8v %v2, %T_i8v* %vaddr
+ store %T_i8v %v2, ptr %vaddr
ret void
}
%T_i16v = type <4 x i16>
%T_i16 = type i16
; CHECK: insertelement_i16
-define void @insertelement_i16(%T_i16* %saddr,
- %T_i16v* %vaddr) {
- %v0 = load %T_i16v, %T_i16v* %vaddr
- %v1 = load %T_i16, %T_i16* %saddr
+define void @insertelement_i16(ptr %saddr,
+ ptr %vaddr) {
+ %v0 = load %T_i16v, ptr %vaddr
+ %v1 = load %T_i16, ptr %saddr
;CHECK: estimated cost of 3 for {{.*}} insertelement <4 x i16>
%v2 = insertelement %T_i16v %v0, %T_i16 %v1, i32 1
- store %T_i16v %v2, %T_i16v* %vaddr
+ store %T_i16v %v2, ptr %vaddr
ret void
}
%T_i32v = type <2 x i32>
%T_i32 = type i32
; CHECK: insertelement_i32
-define void @insertelement_i32(%T_i32* %saddr,
- %T_i32v* %vaddr) {
- %v0 = load %T_i32v, %T_i32v* %vaddr
- %v1 = load %T_i32, %T_i32* %saddr
+define void @insertelement_i32(ptr %saddr,
+ ptr %vaddr) {
+ %v0 = load %T_i32v, ptr %vaddr
+ %v1 = load %T_i32, ptr %saddr
;CHECK: estimated cost of 3 for {{.*}} insertelement <2 x i32>
%v2 = insertelement %T_i32v %v0, %T_i32 %v1, i32 1
- store %T_i32v %v2, %T_i32v* %vaddr
+ store %T_i32v %v2, ptr %vaddr
ret void
}
declare i32 @llvm.fshl.i32(i32, i32, i32)
declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
-declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
-declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>)
+declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x float>)
+declare void @llvm.masked.scatter.v16f32.v16p0(<16 x float>, <16 x ptr>, i32, <16 x i1>)
declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>)
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
+declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1)
define void @smax(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) {
; THRU-LABEL: 'smax'
ret void
}
-define void @maskedgather(<16 x float*> %va, <16 x i1> %vb, <16 x float> %vc) {
+define void @maskedgather(<16 x ptr> %va, <16 x i1> %vb, <16 x float> %vc) {
; THRU-LABEL: 'maskedgather'
-; THRU-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
+; THRU-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; LATE-LABEL: 'maskedgather'
-; LATE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
+; LATE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE-LABEL: 'maskedgather'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE_LATE-LABEL: 'maskedgather'
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
+; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
+ %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
ret void
}
-define void @maskedscatter(<16 x float> %va, <16 x float*> %vb, <16 x i1> %vc) {
+define void @maskedscatter(<16 x float> %va, <16 x ptr> %vb, <16 x i1> %vc) {
; THRU-LABEL: 'maskedscatter'
-; THRU-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
+; THRU-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc)
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; LATE-LABEL: 'maskedscatter'
-; LATE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
+; LATE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc)
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE-LABEL: 'maskedscatter'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE_LATE-LABEL: 'maskedscatter'
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
+; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc)
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
+ call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc)
ret void
}
ret void
}
-define void @memcpy(i8* %a, i8* %b, i32 %c) {
+define void @memcpy(ptr %a, ptr %b, i32 %c) {
; THRU-LABEL: 'memcpy'
-; THRU-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
+; THRU-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false)
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; LATE-LABEL: 'memcpy'
-; LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
+; LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false)
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE-LABEL: 'memcpy'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE_LATE-LABEL: 'memcpy'
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
+; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false)
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false)
ret void
}
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=armv8r-none-eabi < %s | FileCheck %s --check-prefix=CHECK
; Check that cost is 1 for unusual load to register sized load.
-define i32 @loadUnusualIntegerWithTrunc(i128* %ptr) {
+define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualIntegerWithTrunc'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, i128* %ptr, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc = trunc i128 %out to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %trunc
;
- %out = load i128, i128* %ptr
+ %out = load i128, ptr %ptr
%trunc = trunc i128 %out to i32
ret i32 %trunc
}
-define i128 @loadUnusualInteger(i128* %ptr) {
+define i128 @loadUnusualInteger(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualInteger'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, i128* %ptr, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i128 %out
;
- %out = load i128, i128* %ptr
+ %out = load i128, ptr %ptr
ret i128 %out
}
define void @stores() {
; CHECK-NOVEC-LABEL: 'stores'
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 undef, i64* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 undef, i128* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store double undef, double* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 1
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 2
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 2
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 1
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> undef, <4 x float>* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x double> undef, <4 x double>* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> undef, <2 x float>* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, <2 x double>* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 1
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 1
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> undef, <4 x float>* undef, align 1
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, <2 x double>* undef, align 1
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store double undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, ptr undef, align 1
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> undef, ptr undef, align 2
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i64> undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> undef, ptr undef, align 2
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> undef, ptr undef, align 1
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x double> undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i64> undef, ptr undef, align 1
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> undef, ptr undef, align 1
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> undef, ptr undef, align 1
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> undef, ptr undef, align 1
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, ptr undef, align 1
; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-FP-LABEL: 'stores'
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 undef, i64* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 undef, i128* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 1
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 2
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 2
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 1
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> undef, <4 x float>* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x double> undef, <4 x double>* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> undef, <2 x float>* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, <2 x double>* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 1
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 1
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> undef, <4 x float>* undef, align 1
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, <2 x double>* undef, align 1
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 undef, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 undef, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i8> undef, ptr undef, align 1
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i16> undef, ptr undef, align 2
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> undef, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i64> undef, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> undef, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> undef, ptr undef, align 2
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <16 x i8> undef, ptr undef, align 1
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> undef, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x double> undef, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> undef, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i64> undef, ptr undef, align 1
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> undef, ptr undef, align 1
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> undef, ptr undef, align 1
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> undef, ptr undef, align 1
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, ptr undef, align 1
; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-MVE-LABEL: 'stores'
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 undef, i64* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 undef, i128* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 2
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 2
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> undef, <4 x float>* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x double> undef, <4 x double>* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <2 x float> undef, <2 x float>* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, <2 x double>* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> undef, <4 x float>* undef, align 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, <2 x double>* undef, align 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 undef, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 undef, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i8> undef, ptr undef, align 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i16> undef, ptr undef, align 2
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i32> undef, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i64> undef, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> undef, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> undef, ptr undef, align 2
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> undef, ptr undef, align 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> undef, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x double> undef, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <2 x float> undef, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i64> undef, ptr undef, align 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> undef, ptr undef, align 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> undef, ptr undef, align 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> undef, ptr undef, align 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, ptr undef, align 1
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-NEON-LABEL: 'stores'
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 undef, i64* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 undef, i128* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 1
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 2
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 2
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 1
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x double> undef, <4 x double>* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, <2 x double>* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 1
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 1
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 1
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, <2 x double>* undef, align 1
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 undef, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 undef, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i8> undef, ptr undef, align 1
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <2 x i16> undef, ptr undef, align 2
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 2
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 1
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x double> undef, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 1
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 1
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 1
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 1
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, ptr undef, align 1
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-V8-SIZE-LABEL: 'stores'
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i128 undef, i128* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 1
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 2
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 2
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 1
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, <4 x double>* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 1
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 1
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 1
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 1
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i128 undef, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 1
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 2
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 2
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 1
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 1
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 1
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 1
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 1
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 1
; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-MVE-SIZE-LABEL: 'stores'
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i128 undef, i128* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 1
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 2
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 2
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 1
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, <4 x double>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 1
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 1
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 1
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 1
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i128 undef, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 1
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 2
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 2
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 1
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 1
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 1
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 1
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 1
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 1
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- store i8 undef, i8* undef, align 4
- store i16 undef, i16* undef, align 4
- store i32 undef, i32* undef, align 4
- store i64 undef, i64* undef, align 4
- store i128 undef, i128* undef, align 4
- store float undef, float* undef, align 4
- store double undef, double* undef, align 4
+ store i8 undef, ptr undef, align 4
+ store i16 undef, ptr undef, align 4
+ store i32 undef, ptr undef, align 4
+ store i64 undef, ptr undef, align 4
+ store i128 undef, ptr undef, align 4
+ store float undef, ptr undef, align 4
+ store double undef, ptr undef, align 4
- store <2 x i8> undef, <2 x i8>* undef, align 1
- store <2 x i16> undef, <2 x i16>* undef, align 2
- store <2 x i32> undef, <2 x i32>* undef, align 4
- store <2 x i64> undef, <2 x i64>* undef, align 4
- store <4 x i32> undef, <4 x i32>* undef, align 4
- store <8 x i16> undef, <8 x i16>* undef, align 2
- store <16 x i8> undef, <16 x i8>* undef, align 1
+ store <2 x i8> undef, ptr undef, align 1
+ store <2 x i16> undef, ptr undef, align 2
+ store <2 x i32> undef, ptr undef, align 4
+ store <2 x i64> undef, ptr undef, align 4
+ store <4 x i32> undef, ptr undef, align 4
+ store <8 x i16> undef, ptr undef, align 2
+ store <16 x i8> undef, ptr undef, align 1
- store <4 x float> undef, <4 x float>* undef, align 4
- store <4 x double> undef, <4 x double>* undef, align 4
- store <2 x float> undef, <2 x float>* undef, align 4
- store <2 x double> undef, <2 x double>* undef, align 4
+ store <4 x float> undef, ptr undef, align 4
+ store <4 x double> undef, ptr undef, align 4
+ store <2 x float> undef, ptr undef, align 4
+ store <2 x double> undef, ptr undef, align 4
- store <2 x i64> undef, <2 x i64>* undef, align 1
- store <4 x i32> undef, <4 x i32>* undef, align 1
- store <8 x i16> undef, <8 x i16>* undef, align 1
- store <4 x float> undef, <4 x float>* undef, align 1
- store <2 x double> undef, <2 x double>* undef, align 1
+ store <2 x i64> undef, ptr undef, align 1
+ store <4 x i32> undef, ptr undef, align 1
+ store <8 x i16> undef, ptr undef, align 1
+ store <4 x float> undef, ptr undef, align 1
+ store <2 x double> undef, ptr undef, align 1
ret void
}
define void @loads() {
; CHECK-NOVEC-LABEL: 'loads'
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load i64, i64* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load i128, i128* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, float* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = load double, double* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = load <2 x i8>, <2 x i8>* undef, align 1
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = load <2 x i16>, <2 x i16>* undef, align 2
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = load <2 x i32>, <2 x i32>* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = load <2 x i64>, <2 x i64>* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = load <4 x i32>, <4 x i32>* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %13 = load <8 x i16>, <8 x i16>* undef, align 2
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %14 = load <16 x i8>, <16 x i8>* undef, align 1
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %15 = load <4 x float>, <4 x float>* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %16 = load <4 x double>, <4 x double>* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <2 x float>, <2 x float>* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %18 = load <2 x double>, <2 x double>* undef, align 4
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %19 = load <2 x i64>, <2 x i64>* undef, align 1
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <4 x i32>, <4 x i32>* undef, align 1
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %21 = load <8 x i16>, <8 x i16>* undef, align 1
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <4 x float>, <4 x float>* undef, align 1
-; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x double>, <2 x double>* undef, align 1
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load i64, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load i128, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = load double, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = load <2 x i8>, ptr undef, align 1
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = load <2 x i16>, ptr undef, align 2
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = load <2 x i32>, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = load <2 x i64>, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = load <4 x i32>, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %13 = load <8 x i16>, ptr undef, align 2
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %14 = load <16 x i8>, ptr undef, align 1
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %15 = load <4 x float>, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %16 = load <4 x double>, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <2 x float>, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %18 = load <2 x double>, ptr undef, align 4
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %19 = load <2 x i64>, ptr undef, align 1
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <4 x i32>, ptr undef, align 1
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %21 = load <8 x i16>, ptr undef, align 1
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <4 x float>, ptr undef, align 1
+; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x double>, ptr undef, align 1
; CHECK-NOVEC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-FP-LABEL: 'loads'
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load i64, i64* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load i128, i128* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, float* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, double* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = load <2 x i8>, <2 x i8>* undef, align 1
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = load <2 x i16>, <2 x i16>* undef, align 2
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = load <2 x i32>, <2 x i32>* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = load <2 x i64>, <2 x i64>* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = load <4 x i32>, <4 x i32>* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %13 = load <8 x i16>, <8 x i16>* undef, align 2
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %14 = load <16 x i8>, <16 x i8>* undef, align 1
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %15 = load <4 x float>, <4 x float>* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = load <4 x double>, <4 x double>* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <2 x float>, <2 x float>* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = load <2 x double>, <2 x double>* undef, align 4
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %19 = load <2 x i64>, <2 x i64>* undef, align 1
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <4 x i32>, <4 x i32>* undef, align 1
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %21 = load <8 x i16>, <8 x i16>* undef, align 1
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <4 x float>, <4 x float>* undef, align 1
-; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = load <2 x double>, <2 x double>* undef, align 1
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load i64, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load i128, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = load <2 x i8>, ptr undef, align 1
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = load <2 x i16>, ptr undef, align 2
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = load <2 x i32>, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = load <2 x i64>, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = load <4 x i32>, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %13 = load <8 x i16>, ptr undef, align 2
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %14 = load <16 x i8>, ptr undef, align 1
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %15 = load <4 x float>, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = load <4 x double>, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <2 x float>, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = load <2 x double>, ptr undef, align 4
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %19 = load <2 x i64>, ptr undef, align 1
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = load <4 x i32>, ptr undef, align 1
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %21 = load <8 x i16>, ptr undef, align 1
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <4 x float>, ptr undef, align 1
+; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = load <2 x double>, ptr undef, align 1
; CHECK-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-MVE-LABEL: 'loads'
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load i64, i64* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load i128, i128* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, float* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, double* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %8 = load <2 x i8>, <2 x i8>* undef, align 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %9 = load <2 x i16>, <2 x i16>* undef, align 2
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %10 = load <2 x i32>, <2 x i32>* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = load <2 x i64>, <2 x i64>* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = load <4 x i32>, <4 x i32>* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = load <8 x i16>, <8 x i16>* undef, align 2
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = load <16 x i8>, <16 x i8>* undef, align 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = load <4 x float>, <4 x float>* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = load <4 x double>, <4 x double>* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %17 = load <2 x float>, <2 x float>* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = load <2 x double>, <2 x double>* undef, align 4
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <2 x i64>, <2 x i64>* undef, align 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = load <4 x i32>, <4 x i32>* undef, align 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = load <8 x i16>, <8 x i16>* undef, align 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <4 x float>, <4 x float>* undef, align 1
-; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = load <2 x double>, <2 x double>* undef, align 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load i64, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load i128, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %8 = load <2 x i8>, ptr undef, align 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %9 = load <2 x i16>, ptr undef, align 2
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %10 = load <2 x i32>, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = load <2 x i64>, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = load <4 x i32>, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = load <8 x i16>, ptr undef, align 2
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = load <16 x i8>, ptr undef, align 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = load <4 x float>, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = load <4 x double>, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %17 = load <2 x float>, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = load <2 x double>, ptr undef, align 4
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <2 x i64>, ptr undef, align 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = load <4 x i32>, ptr undef, align 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = load <8 x i16>, ptr undef, align 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <4 x float>, ptr undef, align 1
+; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = load <2 x double>, ptr undef, align 1
; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-NEON-LABEL: 'loads'
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load i64, i64* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load i128, i128* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, float* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, double* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <2 x i8>, <2 x i8>* undef, align 1
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i16>, <2 x i16>* undef, align 2
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <2 x i32>, <2 x i32>* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <2 x i64>, <2 x i64>* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <4 x i32>, <4 x i32>* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <8 x i16>, <8 x i16>* undef, align 2
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <16 x i8>, <16 x i8>* undef, align 1
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x float>, <4 x float>* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %16 = load <4 x double>, <4 x double>* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x float>, <2 x float>* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %18 = load <2 x double>, <2 x double>* undef, align 4
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x i64>, <2 x i64>* undef, align 1
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <4 x i32>, <4 x i32>* undef, align 1
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i16>, <8 x i16>* undef, align 1
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <4 x float>, <4 x float>* undef, align 1
-; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x double>, <2 x double>* undef, align 1
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load i64, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load i128, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <2 x i8>, ptr undef, align 1
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i16>, ptr undef, align 2
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <2 x i32>, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <2 x i64>, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <4 x i32>, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <8 x i16>, ptr undef, align 2
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <16 x i8>, ptr undef, align 1
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x float>, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %16 = load <4 x double>, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x float>, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %18 = load <2 x double>, ptr undef, align 4
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x i64>, ptr undef, align 1
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <4 x i32>, ptr undef, align 1
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i16>, ptr undef, align 1
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <4 x float>, ptr undef, align 1
+; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x double>, ptr undef, align 1
; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-V8-SIZE-LABEL: 'loads'
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = load i128, i128* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, float* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, double* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <2 x i8>, <2 x i8>* undef, align 1
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i16>, <2 x i16>* undef, align 2
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <2 x i32>, <2 x i32>* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <2 x i64>, <2 x i64>* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <4 x i32>, <4 x i32>* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <8 x i16>, <8 x i16>* undef, align 2
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <16 x i8>, <16 x i8>* undef, align 1
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x float>, <4 x float>* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x double>, <4 x double>* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x float>, <2 x float>* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <2 x double>, <2 x double>* undef, align 4
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x i64>, <2 x i64>* undef, align 1
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <4 x i32>, <4 x i32>* undef, align 1
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i16>, <8 x i16>* undef, align 1
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <4 x float>, <4 x float>* undef, align 1
-; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <2 x double>, <2 x double>* undef, align 1
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = load i128, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <2 x i8>, ptr undef, align 1
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i16>, ptr undef, align 2
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <2 x i32>, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <2 x i64>, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <4 x i32>, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <8 x i16>, ptr undef, align 2
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <16 x i8>, ptr undef, align 1
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x float>, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x double>, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x float>, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <2 x double>, ptr undef, align 4
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x i64>, ptr undef, align 1
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <4 x i32>, ptr undef, align 1
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i16>, ptr undef, align 1
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <4 x float>, ptr undef, align 1
+; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <2 x double>, ptr undef, align 1
; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-MVE-SIZE-LABEL: 'loads'
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = load i128, i128* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, float* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, double* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <2 x i8>, <2 x i8>* undef, align 1
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i16>, <2 x i16>* undef, align 2
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <2 x i32>, <2 x i32>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <2 x i64>, <2 x i64>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <4 x i32>, <4 x i32>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <8 x i16>, <8 x i16>* undef, align 2
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <16 x i8>, <16 x i8>* undef, align 1
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x float>, <4 x float>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x double>, <4 x double>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x float>, <2 x float>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <2 x double>, <2 x double>* undef, align 4
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x i64>, <2 x i64>* undef, align 1
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <4 x i32>, <4 x i32>* undef, align 1
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i16>, <8 x i16>* undef, align 1
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <4 x float>, <4 x float>* undef, align 1
-; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <2 x double>, <2 x double>* undef, align 1
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = load i128, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <2 x i8>, ptr undef, align 1
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i16>, ptr undef, align 2
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <2 x i32>, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <2 x i64>, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <4 x i32>, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <8 x i16>, ptr undef, align 2
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <16 x i8>, ptr undef, align 1
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x float>, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x double>, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = load <2 x float>, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <2 x double>, ptr undef, align 4
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x i64>, ptr undef, align 1
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <4 x i32>, ptr undef, align 1
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i16>, ptr undef, align 1
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <4 x float>, ptr undef, align 1
+; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <2 x double>, ptr undef, align 1
; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- load i8, i8* undef, align 4
- load i16, i16* undef, align 4
- load i32, i32* undef, align 4
- load i64, i64* undef, align 4
- load i128, i128* undef, align 4
- load float, float* undef, align 4
- load double, double* undef, align 4
+ load i8, ptr undef, align 4
+ load i16, ptr undef, align 4
+ load i32, ptr undef, align 4
+ load i64, ptr undef, align 4
+ load i128, ptr undef, align 4
+ load float, ptr undef, align 4
+ load double, ptr undef, align 4
- load <2 x i8>, <2 x i8>* undef, align 1
- load <2 x i16>, <2 x i16>* undef, align 2
- load <2 x i32>, <2 x i32>* undef, align 4
- load <2 x i64>, <2 x i64>* undef, align 4
- load <4 x i32>, <4 x i32>* undef, align 4
- load <8 x i16>, <8 x i16>* undef, align 2
- load <16 x i8>, <16 x i8>* undef, align 1
+ load <2 x i8>, ptr undef, align 1
+ load <2 x i16>, ptr undef, align 2
+ load <2 x i32>, ptr undef, align 4
+ load <2 x i64>, ptr undef, align 4
+ load <4 x i32>, ptr undef, align 4
+ load <8 x i16>, ptr undef, align 2
+ load <16 x i8>, ptr undef, align 1
- load <4 x float>, <4 x float>* undef, align 4
- load <4 x double>, <4 x double>* undef, align 4
- load <2 x float>, <2 x float>* undef, align 4
- load <2 x double>, <2 x double>* undef, align 4
+ load <4 x float>, ptr undef, align 4
+ load <4 x double>, ptr undef, align 4
+ load <2 x float>, ptr undef, align 4
+ load <2 x double>, ptr undef, align 4
- load <2 x i64>, <2 x i64>* undef, align 1
- load <4 x i32>, <4 x i32>* undef, align 1
- load <8 x i16>, <8 x i16>* undef, align 1
- load <4 x float>, <4 x float>* undef, align 1
- load <2 x double>, <2 x double>* undef, align 1
+ load <2 x i64>, ptr undef, align 1
+ load <4 x i32>, ptr undef, align 1
+ load <8 x i16>, ptr undef, align 1
+ load <4 x float>, ptr undef, align 1
+ load <2 x double>, ptr undef, align 1
ret void
}
; Align 1, 1
;;;;;;;;;;;;
-define void @memcpy_1(i8* %d, i8* %s) {
+define void @memcpy_1(ptr %d, ptr %s) {
;
; with/without strict-align:
;
; strb r1, [r0]
;
; COMMON-LABEL: 'memcpy_1'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 1, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 1 %s, i32 1, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 1, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 1 %s, i32 1, i1 false)
ret void
}
-define void @memcpy_2(i8* %d, i8* %s) {
+define void @memcpy_2(ptr %d, ptr %s) {
;
; no strict-align:
;
; strb r2, [r0]
;
; CHECK-NO-SA-LABEL: 'memcpy_2'
-; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 2, i1 false)
+; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 1 %s, i32 2, i1 false)
; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-SA-LABEL: 'memcpy_2'
-; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 2, i1 false)
+; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 1 %s, i32 2, i1 false)
; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 2, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 1 %s, i32 2, i1 false)
ret void
}
-define void @memcpy_3(i8* %d, i8* %s) {
+define void @memcpy_3(ptr %d, ptr %s) {
;
; no strict-align:
;
; strb r2, [r0]
;
; CHECK-NO-SA-LABEL: 'memcpy_3'
-; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 3, i1 false)
+; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 1 %s, i32 3, i1 false)
; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-SA-LABEL: 'memcpy_3'
-; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 3, i1 false)
+; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 1 %s, i32 3, i1 false)
; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 3, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 1 %s, i32 3, i1 false)
ret void
}
-define void @memcpy_4(i8* %d, i8* %s) {
+define void @memcpy_4(ptr %d, ptr %s) {
;
; no strict-align:
;
; strb.w r12, [r0]
;
; CHECK-NO-SA-LABEL: 'memcpy_4'
-; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 4, i1 false)
+; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 1 %s, i32 4, i1 false)
; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-SA-LABEL: 'memcpy_4'
-; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 4, i1 false)
+; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 1 %s, i32 4, i1 false)
; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 4, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 1 %s, i32 4, i1 false)
ret void
}
-define void @memcpy_8(i8* %d, i8* %s) {
+define void @memcpy_8(ptr %d, ptr %s) {
;
; no strict-align:
;
; pop {r7, pc}
;
; COMMON-LABEL: 'memcpy_8'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 8, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 1 %s, i32 8, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 8, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 1 %s, i32 8, i1 false)
ret void
}
-define void @memcpy_16(i8* %d, i8* %s) {
+define void @memcpy_16(ptr %d, ptr %s) {
;
; no strict-align:
;
; pop {r7, pc}
;
; CHECK-NO-SA-LABEL: 'memcpy_16'
-; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 16, i1 false)
+; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 1 %s, i32 16, i1 false)
; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-SA-LABEL: 'memcpy_16'
-; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 16, i1 false)
+; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 1 %s, i32 16, i1 false)
; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 16, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 1 %s, i32 16, i1 false)
ret void
}
-define void @memcpy_32(i8* %d, i8* %s, i32 %N) {
+define void @memcpy_32(ptr %d, ptr %s, i32 %N) {
;
; with/without strict-align:
;
; bl __aeabi_memcpy
;
; COMMON-LABEL: 'memcpy_32'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 32, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 1 %s, i32 32, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 32, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 1 %s, i32 32, i1 false)
ret void
}
-define void @memcpy_N(i8* %d, i8* %s, i32 %N) {
+define void @memcpy_N(ptr %d, ptr %s, i32 %N) {
;
; with/without strict-align:
;
; bl __aeabi_memcpy
;
; COMMON-LABEL: 'memcpy_N'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 %N, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 1 %s, i32 %N, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 %N, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 1 %s, i32 %N, i1 false)
ret void
}
; Align 2, 2
;;;;;;;;;;;;;
-define void @memcpy_1_al2(i8* %d, i8* %s) {
+define void @memcpy_1_al2(ptr %d, ptr %s) {
;
; with/without strict-align:
;
; strb r1, [r0]
;
; COMMON-LABEL: 'memcpy_1_al2'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 1, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 2 %d, ptr align 2 %s, i32 1, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 1, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 2 %d, ptr align 2 %s, i32 1, i1 false)
ret void
}
-define void @memcpy_2_al2(i8* %d, i8* %s) {
+define void @memcpy_2_al2(ptr %d, ptr %s) {
;
; with/without strict-align:
;
; strh r1, [r0]
;
; COMMON-LABEL: 'memcpy_2_al2'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 2, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 2 %d, ptr align 2 %s, i32 2, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 2, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 2 %d, ptr align 2 %s, i32 2, i1 false)
ret void
}
-define void @memcpy_3_al2(i8* %d, i8* %s) {
+define void @memcpy_3_al2(ptr %d, ptr %s) {
;
; with/without strict-align:
;
; strh r1, [r0]
;
; COMMON-LABEL: 'memcpy_3_al2'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 3, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 2 %d, ptr align 2 %s, i32 3, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 3, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 2 %d, ptr align 2 %s, i32 3, i1 false)
ret void
}
-define void @memcpy_4_al2(i8* %d, i8* %s) {
+define void @memcpy_4_al2(ptr %d, ptr %s) {
;
; no strict-align:
;
; strh r1, [r0]
;
; CHECK-NO-SA-LABEL: 'memcpy_4_al2'
-; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 4, i1 false)
+; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 2 %d, ptr align 2 %s, i32 4, i1 false)
; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-SA-LABEL: 'memcpy_4_al2'
-; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 4, i1 false)
+; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 2 %d, ptr align 2 %s, i32 4, i1 false)
; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 4, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 2 %d, ptr align 2 %s, i32 4, i1 false)
ret void
}
-define void @memcpy_8_al2(i8* %d, i8* %s) {
+define void @memcpy_8_al2(ptr %d, ptr %s) {
;
; no strict-align:
;
; strh r1, [r0]
;
; CHECK-NO-SA-LABEL: 'memcpy_8_al2'
-; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 8, i1 false)
+; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 2 %d, ptr align 2 %s, i32 8, i1 false)
; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-SA-LABEL: 'memcpy_8_al2'
-; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 8, i1 false)
+; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 2 %d, ptr align 2 %s, i32 8, i1 false)
; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 8, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 2 %d, ptr align 2 %s, i32 8, i1 false)
ret void
}
-define void @memcpy_16_al2(i8* %d, i8* %s) {
+define void @memcpy_16_al2(ptr %d, ptr %s) {
;
; no strict-align:
;
; bl __aeabi_memcpy
;
; CHECK-NO-SA-LABEL: 'memcpy_16_al2'
-; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 16, i1 false)
+; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 2 %d, ptr align 2 %s, i32 16, i1 false)
; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-SA-LABEL: 'memcpy_16_al2'
-; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 16, i1 false)
+; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 2 %d, ptr align 2 %s, i32 16, i1 false)
; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 16, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 2 %d, ptr align 2 %s, i32 16, i1 false)
ret void
}
-define void @memcpy_32_al2(i8* %d, i8* %s, i32 %N) {
+define void @memcpy_32_al2(ptr %d, ptr %s, i32 %N) {
;
; with/without strict-align:
;
; bl __aeabi_memcpy
;
; COMMON-LABEL: 'memcpy_32_al2'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 32, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 2 %d, ptr align 2 %s, i32 32, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 32, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 2 %d, ptr align 2 %s, i32 32, i1 false)
ret void
}
-define void @memcpy_N_al2(i8* %d, i8* %s, i32 %N) {
+define void @memcpy_N_al2(ptr %d, ptr %s, i32 %N) {
;
; with/without strict-align:
;
; bl __aeabi_memcpy
;
; COMMON-LABEL: 'memcpy_N_al2'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 %N, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 2 %d, ptr align 2 %s, i32 %N, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 %N, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 2 %d, ptr align 2 %s, i32 %N, i1 false)
ret void
}
; Align 4, 4
;;;;;;;;;;;;;
-define void @memcpy_1_al4(i8* %d, i8* %s) {
+define void @memcpy_1_al4(ptr %d, ptr %s) {
;
; with/without strict-align:
;
; strb r1, [r0]
;
; COMMON-LABEL: 'memcpy_1_al4'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 1, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %d, ptr align 4 %s, i32 1, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 1, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 4 %d, ptr align 4 %s, i32 1, i1 false)
ret void
}
-define void @memcpy_2_al4(i8* %d, i8* %s) {
+define void @memcpy_2_al4(ptr %d, ptr %s) {
;
; with/without strict-align:
;
; strh r1, [r0]
;
; COMMON-LABEL: 'memcpy_2_al4'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 2, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %d, ptr align 4 %s, i32 2, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 2, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 4 %d, ptr align 4 %s, i32 2, i1 false)
ret void
}
-define void @memcpy_3_al4(i8* %d, i8* %s) {
+define void @memcpy_3_al4(ptr %d, ptr %s) {
;
; with/without strict-align:
;
; strh r1, [r0]
;
; COMMON-LABEL: 'memcpy_3_al4'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 3, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %d, ptr align 4 %s, i32 3, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 3, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 4 %d, ptr align 4 %s, i32 3, i1 false)
ret void
}
-define void @memcpy_4_al4(i8* %d, i8* %s) {
+define void @memcpy_4_al4(ptr %d, ptr %s) {
;
; with/without strict-align:
;
; str r1, [r0]
;
; COMMON-LABEL: 'memcpy_4_al4'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 4, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %d, ptr align 4 %s, i32 4, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 4, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 4 %d, ptr align 4 %s, i32 4, i1 false)
ret void
}
-define void @memcpy_8_al4(i8* %d, i8* %s) {
+define void @memcpy_8_al4(ptr %d, ptr %s) {
;
; with/without strict-align:
;
; strd r2, r1, [r0]
;
; COMMON-LABEL: 'memcpy_8_al4'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 8, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %d, ptr align 4 %s, i32 8, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 8, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 4 %d, ptr align 4 %s, i32 8, i1 false)
ret void
}
-define void @memcpy_16_al4(i8* %d, i8* %s) {
+define void @memcpy_16_al4(ptr %d, ptr %s) {
;
; with/without strict-align:
;
; str r1, [r0, #12]
;
; COMMON-LABEL: 'memcpy_16_al4'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 16, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %d, ptr align 4 %s, i32 16, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 16, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 4 %d, ptr align 4 %s, i32 16, i1 false)
ret void
}
-define void @memcpy_32_al4(i8* %d, i8* %s, i32 %N) {
+define void @memcpy_32_al4(ptr %d, ptr %s, i32 %N) {
;
; with/without strict-align:
;
; stm.w r0, {r2, r3, r12, lr}
;
; COMMON-LABEL: 'memcpy_32_al4'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 32, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %d, ptr align 4 %s, i32 32, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 32, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 4 %d, ptr align 4 %s, i32 32, i1 false)
ret void
}
-define void @memcpy_N_al4(i8* %d, i8* %s, i32 %N) {
+define void @memcpy_N_al4(ptr %d, ptr %s, i32 %N) {
;
; with/without strict-align:
;
; bl __aeabi_memcpy4
;
; COMMON-LABEL: 'memcpy_N_al4'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 %N, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %d, ptr align 4 %s, i32 %N, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 %N, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 4 %d, ptr align 4 %s, i32 %N, i1 false)
ret void
}
; Align 1, 4
;;;;;;;;;;;;;
-define void @memcpy_1_al14(i8* %d, i8* %s) {
+define void @memcpy_1_al14(ptr %d, ptr %s) {
;
; with/without strict-align:
;
; strb r1, [r0]
;
; COMMON-LABEL: 'memcpy_1_al14'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 1, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 4 %s, i32 1, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 1, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 4 %s, i32 1, i1 false)
ret void
}
-define void @memcpy_2_al14(i8* %d, i8* %s) {
+define void @memcpy_2_al14(ptr %d, ptr %s) {
;
; no strict-align:
;
; strb r2, [r0]
;
; CHECK-NO-SA-LABEL: 'memcpy_2_al14'
-; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 2, i1 false)
+; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 4 %s, i32 2, i1 false)
; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-SA-LABEL: 'memcpy_2_al14'
-; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 2, i1 false)
+; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 4 %s, i32 2, i1 false)
; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 2, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 4 %s, i32 2, i1 false)
ret void
}
-define void @memcpy_3_al14(i8* %d, i8* %s) {
+define void @memcpy_3_al14(ptr %d, ptr %s) {
;
; no strict-align:
;
; strb r2, [r0]
;
; CHECK-NO-SA-LABEL: 'memcpy_3_al14'
-; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 3, i1 false)
+; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 4 %s, i32 3, i1 false)
; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-SA-LABEL: 'memcpy_3_al14'
-; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 3, i1 false)
+; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 4 %s, i32 3, i1 false)
; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 3, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 4 %s, i32 3, i1 false)
ret void
}
-define void @memcpy_4_al14(i8* %d, i8* %s) {
+define void @memcpy_4_al14(ptr %d, ptr %s) {
;
; no strict-align:
;
; strb.w r12, [r0]
;
; CHECK-NO-SA-LABEL: 'memcpy_4_al14'
-; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 4, i1 false)
+; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 4 %s, i32 4, i1 false)
; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-SA-LABEL: 'memcpy_4_al14'
-; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 4, i1 false)
+; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 4 %s, i32 4, i1 false)
; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 4, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 4 %s, i32 4, i1 false)
ret void
}
-define void @memcpy_8_al14(i8* %d, i8* %s) {
+define void @memcpy_8_al14(ptr %d, ptr %s) {
;
; no strict-align:
;
; pop {r7, pc}
;
; COMMON-LABEL: 'memcpy_8_al14'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 8, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 4 %s, i32 8, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 8, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 4 %s, i32 8, i1 false)
ret void
}
-define void @memcpy_16_al14(i8* %d, i8* %s) {
+define void @memcpy_16_al14(ptr %d, ptr %s) {
;
; no strict-align:
;
; bl __aeabi_memcpy
;
; CHECK-NO-SA-LABEL: 'memcpy_16_al14'
-; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 16, i1 false)
+; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 4 %s, i32 16, i1 false)
; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-SA-LABEL: 'memcpy_16_al14'
-; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 16, i1 false)
+; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 4 %s, i32 16, i1 false)
; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 16, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 4 %s, i32 16, i1 false)
ret void
}
-define void @memcpy_32_al14(i8* %d, i8* %s) {
+define void @memcpy_32_al14(ptr %d, ptr %s) {
;
; with/without strict-align:
;
; bl __aeabi_memcpy
;
; COMMON-LABEL: 'memcpy_32_al14'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 32, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 4 %s, i32 32, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 32, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 4 %s, i32 32, i1 false)
ret void
}
-define void @memcpy_N_al14(i8* %d, i8* %s, i32 %N) {
+define void @memcpy_N_al14(ptr %d, ptr %s, i32 %N) {
;
; with/without strict-align:
;
; bl __aeabi_memcpy4
;
; COMMON-LABEL: 'memcpy_N_al14'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 %N, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 4 %s, i32 %N, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 %N, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 1 %d, ptr align 4 %s, i32 %N, i1 false)
ret void
}
; Align 4, 1
;;;;;;;;;;;;;
-define void @memcpy_1_al41(i8* %d, i8* %s) {
+define void @memcpy_1_al41(ptr %d, ptr %s) {
;
; with/without strict-align:
;
; strb r1, [r0]
;
; COMMON-LABEL: 'memcpy_1_al41'
-; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 1 %s, i32 1, i1 false)
+; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %d, ptr align 1 %s, i32 1, i1 false)
; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 1 %s, i32 1, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 4 %d, ptr align 1 %s, i32 1, i1 false)
ret void
}
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) #1
+declare void @llvm.memcpy.p0.p0.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) #1
%T432 = type <4 x i32>
%T464 = type <4 x i64>
-define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) {
+define void @direct(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST-LABEL: 'direct'
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, <4 x i32>* %loadaddr, align 8
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, <4 x i32>* %loadaddr2, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, ptr %loadaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, ptr %loadaddr2, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = mul <4 x i32> %v0, %v1
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %v0 = load %T432, %T432* %loadaddr
+ %v0 = load %T432, ptr %loadaddr
; ASM: vld1.64
- %v1 = load %T432, %T432* %loadaddr2
+ %v1 = load %T432, ptr %loadaddr2
; ASM: vld1.64
%r3 = mul %T432 %v0, %v1
; ASM: vmul.i32
- store %T432 %r3, %T432* %storeaddr
+ store %T432 %r3, ptr %storeaddr
; ASM: vst1.64
ret void
}
-define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
+define void @ups1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST-LABEL: 'ups1632'
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, <4 x i16>* %loadaddr, align 8
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, <4 x i16>* %loadaddr2, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, ptr %loadaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, ptr %loadaddr2, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = sext <4 x i16> %v0 to <4 x i32>
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext <4 x i16> %v1 to <4 x i32>
; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = mul <4 x i32> %r1, %r2
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %v0 = load %T416, %T416* %loadaddr
+ %v0 = load %T416, ptr %loadaddr
; ASM: vldr
- %v1 = load %T416, %T416* %loadaddr2
+ %v1 = load %T416, ptr %loadaddr2
; ASM: vldr
%r1 = sext %T416 %v0 to %T432
%r2 = sext %T416 %v1 to %T432
%r3 = mul %T432 %r1, %r2
; ASM: vmull.s16
- store %T432 %r3, %T432* %storeaddr
+ store %T432 %r3, ptr %storeaddr
; ASM: vst1.64
ret void
}
-define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
+define void @upu1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST-LABEL: 'upu1632'
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, <4 x i16>* %loadaddr, align 8
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, <4 x i16>* %loadaddr2, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, ptr %loadaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, ptr %loadaddr2, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext <4 x i16> %v0 to <4 x i32>
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = zext <4 x i16> %v1 to <4 x i32>
; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = mul <4 x i32> %r1, %r2
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %v0 = load %T416, %T416* %loadaddr
+ %v0 = load %T416, ptr %loadaddr
; ASM: vldr
- %v1 = load %T416, %T416* %loadaddr2
+ %v1 = load %T416, ptr %loadaddr2
; ASM: vldr
%r1 = zext %T416 %v0 to %T432
%r2 = zext %T416 %v1 to %T432
%r3 = mul %T432 %r1, %r2
; ASM: vmull.u16
- store %T432 %r3, %T432* %storeaddr
+ store %T432 %r3, ptr %storeaddr
; ASM: vst1.64
ret void
}
-define void @ups3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
+define void @ups3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST-LABEL: 'ups3264'
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, <2 x i32>* %loadaddr, align 8
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, <2 x i32>* %loadaddr2, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, ptr %loadaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, ptr %loadaddr2, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = mul <2 x i32> %v0, %v1
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = sext <2 x i32> %r3 to <2 x i64>
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, <2 x i64>* %storeaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, ptr %storeaddr, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %v0 = load %T232, %T232* %loadaddr
+ %v0 = load %T232, ptr %loadaddr
; ASM: vldr
- %v1 = load %T232, %T232* %loadaddr2
+ %v1 = load %T232, ptr %loadaddr2
; ASM: vldr
%r3 = mul %T232 %v0, %v1
; ASM: vmul.i32
%st = sext %T232 %r3 to %T264
; ASM: vmovl.s32
- store %T264 %st, %T264* %storeaddr
+ store %T264 %st, ptr %storeaddr
; ASM: vst1.64
ret void
}
-define void @upu3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
+define void @upu3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST-LABEL: 'upu3264'
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, <2 x i32>* %loadaddr, align 8
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, <2 x i32>* %loadaddr2, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, ptr %loadaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, ptr %loadaddr2, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = mul <2 x i32> %v0, %v1
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = zext <2 x i32> %r3 to <2 x i64>
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, <2 x i64>* %storeaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, ptr %storeaddr, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %v0 = load %T232, %T232* %loadaddr
+ %v0 = load %T232, ptr %loadaddr
; ASM: vldr
- %v1 = load %T232, %T232* %loadaddr2
+ %v1 = load %T232, ptr %loadaddr2
; ASM: vldr
%r3 = mul %T232 %v0, %v1
; ASM: vmul.i32
%st = zext %T232 %r3 to %T264
; ASM: vmovl.u32
- store %T264 %st, %T264* %storeaddr
+ store %T264 %st, ptr %storeaddr
; ASM: vst1.64
ret void
}
-define void @dn3216(%T432* %loadaddr, %T432* %loadaddr2, %T416* %storeaddr) {
+define void @dn3216(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST-LABEL: 'dn3216'
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, <4 x i32>* %loadaddr, align 8
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, <4 x i32>* %loadaddr2, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, ptr %loadaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, ptr %loadaddr2, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = mul <4 x i32> %v0, %v1
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = trunc <4 x i32> %r3 to <4 x i16>
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %st, <4 x i16>* %storeaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %st, ptr %storeaddr, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %v0 = load %T432, %T432* %loadaddr
+ %v0 = load %T432, ptr %loadaddr
; ASM: vld1.64
- %v1 = load %T432, %T432* %loadaddr2
+ %v1 = load %T432, ptr %loadaddr2
; ASM: vld1.64
%r3 = mul %T432 %v0, %v1
; ASM: vmul.i32
%st = trunc %T432 %r3 to %T416
; ASM: vmovn.i32
- store %T416 %st, %T416* %storeaddr
+ store %T416 %st, ptr %storeaddr
; ASM: vstr
ret void
}
define i32 @masked_gather() {
; CHECK-LABEL: 'masked_gather'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 4, <4 x i1> undef, <4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 4, <2 x i1> undef, <2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 4, <16 x i1> undef, <16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 4, <8 x i1> undef, <8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 4, <4 x i1> undef, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 4, <2 x i1> undef, <2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16F16 = call <16 x half> @llvm.masked.gather.v16f16.v16p0f16(<16 x half*> undef, i32 2, <16 x i1> undef, <16 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F16 = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> undef, i32 2, <8 x i1> undef, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F16 = call <4 x half> @llvm.masked.gather.v4f16.v4p0f16(<4 x half*> undef, i32 2, <4 x i1> undef, <4 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F16 = call <2 x half> @llvm.masked.gather.v2f16.v2p0f16(<2 x half*> undef, i32 2, <2 x i1> undef, <2 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 4, <4 x i1> undef, <4 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 4, <2 x i1> undef, <2 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 4, <16 x i1> undef, <16 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 4, <8 x i1> undef, <8 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 4, <4 x i1> undef, <4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 4, <2 x i1> undef, <2 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 2, <16 x i1> undef, <16 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 2, <8 x i1> undef, <8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 2, <4 x i1> undef, <4 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I16 = call <2 x i16> @llvm.masked.gather.v2i16.v2p0i16(<2 x i16*> undef, i32 2, <2 x i1> undef, <2 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I8 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> undef, i32 1, <4 x i1> undef, <4 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I8 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0i8(<2 x i8*> undef, i32 1, <2 x i1> undef, <2 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32p = call <4 x i32*> @llvm.masked.gather.v4p0i32.v4p0p0i32(<4 x i32**> undef, i32 4, <4 x i1> undef, <4 x i32*> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 4, <16 x i1> undef, <16 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 4, <8 x i1> undef, <8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16F16 = call <16 x half> @llvm.masked.gather.v16f16.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F16 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F16 = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F16 = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 4, <16 x i1> undef, <16 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 4, <8 x i1> undef, <8 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I16 = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I8 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I8 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32p = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x ptr> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
- %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 4, <4 x i1> undef, <4 x double> undef)
- %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 4, <2 x i1> undef, <2 x double> undef)
+ %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x double> undef)
+ %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x double> undef)
- %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 4, <16 x i1> undef, <16 x float> undef)
- %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 4, <8 x i1> undef, <8 x float> undef)
- %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 4, <4 x i1> undef, <4 x float> undef)
- %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 4, <2 x i1> undef, <2 x float> undef)
+ %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 4, <16 x i1> undef, <16 x float> undef)
+ %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 4, <8 x i1> undef, <8 x float> undef)
+ %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x float> undef)
+ %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x float> undef)
- %V16F16 = call <16 x half> @llvm.masked.gather.v16f16.v16p0f16(<16 x half*> undef, i32 2, <16 x i1> undef, <16 x half> undef)
- %V8F16 = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> undef, i32 2, <8 x i1> undef, <8 x half> undef)
- %V4F16 = call <4 x half> @llvm.masked.gather.v4f16.v4p0f16(<4 x half*> undef, i32 2, <4 x i1> undef, <4 x half> undef)
- %V2F16 = call <2 x half> @llvm.masked.gather.v2f16.v2p0f16(<2 x half*> undef, i32 2, <2 x i1> undef, <2 x half> undef)
+ %V16F16 = call <16 x half> @llvm.masked.gather.v16f16.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x half> undef)
+ %V8F16 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x half> undef)
+ %V4F16 = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x half> undef)
+ %V2F16 = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x half> undef)
- %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 4, <4 x i1> undef, <4 x i64> undef)
- %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 4, <2 x i1> undef, <2 x i64> undef)
+ %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x i64> undef)
+ %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x i64> undef)
- %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 4, <16 x i1> undef, <16 x i32> undef)
- %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 4, <8 x i1> undef, <8 x i32> undef)
- %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 4, <4 x i1> undef, <4 x i32> undef)
- %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 4, <2 x i1> undef, <2 x i32> undef)
+ %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 4, <16 x i1> undef, <16 x i32> undef)
+ %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 4, <8 x i1> undef, <8 x i32> undef)
+ %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x i32> undef)
+ %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x i32> undef)
- %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 2, <16 x i1> undef, <16 x i16> undef)
- %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 2, <8 x i1> undef, <8 x i16> undef)
- %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 2, <4 x i1> undef, <4 x i16> undef)
- %V2I16 = call <2 x i16> @llvm.masked.gather.v2i16.v2p0i16(<2 x i16*> undef, i32 2, <2 x i1> undef, <2 x i16> undef)
+ %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x i16> undef)
+ %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x i16> undef)
+ %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x i16> undef)
+ %V2I16 = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x i16> undef)
- %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
- %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
- %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
- %V4I8 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> undef, i32 1, <4 x i1> undef, <4 x i8> undef)
- %V2I8 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0i8(<2 x i8*> undef, i32 1, <2 x i1> undef, <2 x i8> undef)
+ %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+ %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+ %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+ %V4I8 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i8> undef)
+ %V2I8 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i8> undef)
- %V4I32p = call <4 x i32*> @llvm.masked.gather.v4p0i32.v4p0p0i32(<4 x i32**> undef, i32 4, <4 x i1> undef, <4 x i32*> undef)
+ %V4I32p = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x ptr> undef)
ret i32 0
}
define i32 @masked_scatter() {
; CHECK-LABEL: 'masked_scatter'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 4, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 4, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 4, <16 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 4, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 4, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 4, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16f16.v16p0f16(<16 x half> undef, <16 x half*> undef, i32 2, <16 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> undef, <8 x half*> undef, i32 2, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half> undef, <4 x half*> undef, i32 2, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2f16.v2p0f16(<2 x half> undef, <2 x half*> undef, i32 2, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 4, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 4, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 4, <16 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 4, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 4, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 4, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 2, <16 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 2, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 2, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v2i16.v2p0i16(<2 x i16> undef, <2 x i16*> undef, i32 2, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> undef, <4 x i8*> undef, i32 1, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v2i8.v2p0i8(<2 x i8> undef, <2 x i8*> undef, i32 1, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 4, <16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 4, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16f16.v16p0(<16 x half> undef, <16 x ptr> undef, i32 2, <16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 4, <16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 4, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 2, <16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
- call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 4, <4 x i1> undef)
- call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 4, <2 x i1> undef)
-
- call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 4, <16 x i1> undef)
- call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 4, <8 x i1> undef)
- call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 4, <4 x i1> undef)
- call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 4, <2 x i1> undef)
-
- call void @llvm.masked.scatter.v16f16.v16p0f16(<16 x half> undef, <16 x half*> undef, i32 2, <16 x i1> undef)
- call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> undef, <8 x half*> undef, i32 2, <8 x i1> undef)
- call void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half> undef, <4 x half*> undef, i32 2, <4 x i1> undef)
- call void @llvm.masked.scatter.v2f16.v2p0f16(<2 x half> undef, <2 x half*> undef, i32 2, <2 x i1> undef)
-
- call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 4, <4 x i1> undef)
- call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 4, <2 x i1> undef)
-
- call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 4, <16 x i1> undef)
- call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 4, <8 x i1> undef)
- call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 4, <4 x i1> undef)
- call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 4, <2 x i1> undef)
-
- call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 2, <16 x i1> undef)
- call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 2, <8 x i1> undef)
- call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 2, <4 x i1> undef)
- call void @llvm.masked.scatter.v2i16.v2p0i16(<2 x i16> undef, <2 x i16*> undef, i32 2, <2 x i1> undef)
-
- call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
- call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> undef, <4 x i8*> undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.scatter.v2i8.v2p0i8(<2 x i8> undef, <2 x i8*> undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
+
+ call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 4, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 4, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
+
+ call void @llvm.masked.scatter.v16f16.v16p0(<16 x half> undef, <16 x ptr> undef, i32 2, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
+
+ call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
+
+ call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 4, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 4, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
+
+ call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 2, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
+
+ call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+ call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
ret i32 0
}
-define void @gep_v4i32(i32* %base, i16* %base16, i8* %base8, <4 x i32> %ind32, <4 x i16> %ind16, <4 x i1> %mask) {
+define void @gep_v4i32(ptr %base, ptr %base16, ptr %base8, <4 x i32> %ind32, <4 x i16> %ind16, <4 x i1> %mask) {
; CHECK-LABEL: 'gep_v4i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep1 = getelementptr i32, i32* %base, <4 x i32> %ind32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res1 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep1, i32 4, <4 x i1> %mask, <4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %res1, <4 x i32*> %gep1, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep1 = getelementptr i32, ptr %base, <4 x i32> %ind32
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res1 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep1, i32 4, <4 x i1> %mask, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %res1, <4 x ptr> %gep1, i32 4, <4 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %indzext = zext <4 x i16> %ind16 to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep2 = getelementptr i32, i32* %base, <4 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res2 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep2, i32 4, <4 x i1> %mask, <4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %res2, <4 x i32*> %gep2, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep2 = getelementptr i32, ptr %base, <4 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res2 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep2, i32 4, <4 x i1> %mask, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %res2, <4 x ptr> %gep2, i32 4, <4 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %indsext = sext <4 x i16> %ind16 to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep3 = getelementptr i32, i32* %base, <4 x i32> %indsext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res3 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep3, i32 4, <4 x i1> %mask, <4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %res3, <4 x i32*> %gep3, i32 4, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepu = getelementptr i32, i32* %base, <4 x i32> %ind32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %resu = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gepu, i32 1, <4 x i1> %mask, <4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %resu, <4 x i32*> %gepu, i32 1, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepos = getelementptr i8, i8* %base8, <4 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %geposb = bitcast <4 x i8*> %gepos to <4 x i32*>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %resos = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %geposb, i32 4, <4 x i1> %mask, <4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %resos, <4 x i32*> %geposb, i32 4, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i16, i16* %base16, <4 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <4 x i16*> %gepbs to <4 x i32*>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %resbs = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gepbsb, i32 4, <4 x i1> %mask, <4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %resbs, <4 x i32*> %gepbsb, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep3 = getelementptr i32, ptr %base, <4 x i32> %indsext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res3 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep3, i32 4, <4 x i1> %mask, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %res3, <4 x ptr> %gep3, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepu = getelementptr i32, ptr %base, <4 x i32> %ind32
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %resu = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gepu, i32 1, <4 x i1> %mask, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %resu, <4 x ptr> %gepu, i32 1, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepos = getelementptr i8, ptr %base8, <4 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %geposb = bitcast <4 x ptr> %gepos to <4 x ptr>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %resos = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %geposb, i32 4, <4 x i1> %mask, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %resos, <4 x ptr> %geposb, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i16, ptr %base16, <4 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <4 x ptr> %gepbs to <4 x ptr>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %resbs = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gepbsb, i32 4, <4 x i1> %mask, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %resbs, <4 x ptr> %gepbsb, i32 4, <4 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %gep1 = getelementptr i32, i32* %base, <4 x i32> %ind32
- %res1 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep1, i32 4, <4 x i1> %mask, <4 x i32> undef)
- call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %res1, <4 x i32*> %gep1, i32 4, <4 x i1> %mask)
+ %gep1 = getelementptr i32, ptr %base, <4 x i32> %ind32
+ %res1 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep1, i32 4, <4 x i1> %mask, <4 x i32> undef)
+ call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %res1, <4 x ptr> %gep1, i32 4, <4 x i1> %mask)
%indzext = zext <4 x i16> %ind16 to <4 x i32>
- %gep2 = getelementptr i32, i32* %base, <4 x i32> %indzext
- %res2 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep2, i32 4, <4 x i1> %mask, <4 x i32> undef)
- call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %res2, <4 x i32*> %gep2, i32 4, <4 x i1> %mask)
+ %gep2 = getelementptr i32, ptr %base, <4 x i32> %indzext
+ %res2 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep2, i32 4, <4 x i1> %mask, <4 x i32> undef)
+ call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %res2, <4 x ptr> %gep2, i32 4, <4 x i1> %mask)
%indsext = sext <4 x i16> %ind16 to <4 x i32>
- %gep3 = getelementptr i32, i32* %base, <4 x i32> %indsext
- %res3 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep3, i32 4, <4 x i1> %mask, <4 x i32> undef)
- call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %res3, <4 x i32*> %gep3, i32 4, <4 x i1> %mask)
+ %gep3 = getelementptr i32, ptr %base, <4 x i32> %indsext
+ %res3 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep3, i32 4, <4 x i1> %mask, <4 x i32> undef)
+ call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %res3, <4 x ptr> %gep3, i32 4, <4 x i1> %mask)
; unaligned
- %gepu = getelementptr i32, i32* %base, <4 x i32> %ind32
- %resu = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gepu, i32 1, <4 x i1> %mask, <4 x i32> undef)
- call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %resu, <4 x i32*> %gepu, i32 1, <4 x i1> %mask)
+ %gepu = getelementptr i32, ptr %base, <4 x i32> %ind32
+ %resu = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gepu, i32 1, <4 x i1> %mask, <4 x i32> undef)
+ call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %resu, <4 x ptr> %gepu, i32 1, <4 x i1> %mask)
; 1 scale
- %gepos = getelementptr i8, i8* %base8, <4 x i32> %indzext
- %geposb = bitcast <4 x i8*> %gepos to <4 x i32*>
- %resos = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %geposb, i32 4, <4 x i1> %mask, <4 x i32> undef)
- call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %resos, <4 x i32*> %geposb, i32 4, <4 x i1> %mask)
+ %gepos = getelementptr i8, ptr %base8, <4 x i32> %indzext
+ %geposb = bitcast <4 x ptr> %gepos to <4 x ptr>
+ %resos = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %geposb, i32 4, <4 x i1> %mask, <4 x i32> undef)
+ call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %resos, <4 x ptr> %geposb, i32 4, <4 x i1> %mask)
; bad scale (but doesn't really matter because i32)
- %gepbs = getelementptr i16, i16* %base16, <4 x i32> %indzext
- %gepbsb = bitcast <4 x i16*> %gepbs to <4 x i32*>
- %resbs = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gepbsb, i32 4, <4 x i1> %mask, <4 x i32> undef)
- call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %resbs, <4 x i32*> %gepbsb, i32 4, <4 x i1> %mask)
+ %gepbs = getelementptr i16, ptr %base16, <4 x i32> %indzext
+ %gepbsb = bitcast <4 x ptr> %gepbs to <4 x ptr>
+ %resbs = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gepbsb, i32 4, <4 x i1> %mask, <4 x i32> undef)
+ call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %resbs, <4 x ptr> %gepbsb, i32 4, <4 x i1> %mask)
ret void
}
-define void @gep_v4f32(float* %base, i16* %base16, i8* %base8, <4 x i32> %ind32, <4 x i16> %ind16, <4 x i1> %mask) {
+define void @gep_v4f32(ptr %base, ptr %base16, ptr %base8, <4 x i32> %ind32, <4 x i16> %ind16, <4 x i1> %mask) {
; CHECK-LABEL: 'gep_v4f32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep1 = getelementptr float, float* %base, <4 x i32> %ind32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res1 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep1, i32 4, <4 x i1> %mask, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %res1, <4 x float*> %gep1, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep1 = getelementptr float, ptr %base, <4 x i32> %ind32
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res1 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep1, i32 4, <4 x i1> %mask, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %res1, <4 x ptr> %gep1, i32 4, <4 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %indzext = zext <4 x i16> %ind16 to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep2 = getelementptr float, float* %base, <4 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res2 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep2, i32 4, <4 x i1> %mask, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %res2, <4 x float*> %gep2, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep2 = getelementptr float, ptr %base, <4 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res2 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep2, i32 4, <4 x i1> %mask, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %res2, <4 x ptr> %gep2, i32 4, <4 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %indsext = sext <4 x i16> %ind16 to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep3 = getelementptr float, float* %base, <4 x i32> %indsext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res3 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep3, i32 4, <4 x i1> %mask, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %res3, <4 x float*> %gep3, i32 4, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gepu = getelementptr float, float* %base, <4 x i32> %ind32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %resu = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gepu, i32 1, <4 x i1> %mask, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %resu, <4 x float*> %gepu, i32 1, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepos = getelementptr i8, i8* %base8, <4 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %geposb = bitcast <4 x i8*> %gepos to <4 x float*>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %resos = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %geposb, i32 4, <4 x i1> %mask, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %resos, <4 x float*> %geposb, i32 4, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i16, i16* %base16, <4 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <4 x i16*> %gepbs to <4 x float*>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %resbs = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gepbsb, i32 4, <4 x i1> %mask, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %resbs, <4 x float*> %gepbsb, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep3 = getelementptr float, ptr %base, <4 x i32> %indsext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res3 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep3, i32 4, <4 x i1> %mask, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %res3, <4 x ptr> %gep3, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gepu = getelementptr float, ptr %base, <4 x i32> %ind32
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %resu = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gepu, i32 1, <4 x i1> %mask, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %resu, <4 x ptr> %gepu, i32 1, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepos = getelementptr i8, ptr %base8, <4 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %geposb = bitcast <4 x ptr> %gepos to <4 x ptr>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %resos = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %geposb, i32 4, <4 x i1> %mask, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %resos, <4 x ptr> %geposb, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i16, ptr %base16, <4 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <4 x ptr> %gepbs to <4 x ptr>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %resbs = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gepbsb, i32 4, <4 x i1> %mask, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %resbs, <4 x ptr> %gepbsb, i32 4, <4 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %gep1 = getelementptr float, float* %base, <4 x i32> %ind32
- %res1 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep1, i32 4, <4 x i1> %mask, <4 x float> undef)
- call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %res1, <4 x float*> %gep1, i32 4, <4 x i1> %mask)
+ %gep1 = getelementptr float, ptr %base, <4 x i32> %ind32
+ %res1 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep1, i32 4, <4 x i1> %mask, <4 x float> undef)
+ call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %res1, <4 x ptr> %gep1, i32 4, <4 x i1> %mask)
%indzext = zext <4 x i16> %ind16 to <4 x i32>
- %gep2 = getelementptr float, float* %base, <4 x i32> %indzext
- %res2 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep2, i32 4, <4 x i1> %mask, <4 x float> undef)
- call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %res2, <4 x float*> %gep2, i32 4, <4 x i1> %mask)
+ %gep2 = getelementptr float, ptr %base, <4 x i32> %indzext
+ %res2 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep2, i32 4, <4 x i1> %mask, <4 x float> undef)
+ call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %res2, <4 x ptr> %gep2, i32 4, <4 x i1> %mask)
%indsext = sext <4 x i16> %ind16 to <4 x i32>
- %gep3 = getelementptr float, float* %base, <4 x i32> %indsext
- %res3 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep3, i32 4, <4 x i1> %mask, <4 x float> undef)
- call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %res3, <4 x float*> %gep3, i32 4, <4 x i1> %mask)
+ %gep3 = getelementptr float, ptr %base, <4 x i32> %indsext
+ %res3 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep3, i32 4, <4 x i1> %mask, <4 x float> undef)
+ call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %res3, <4 x ptr> %gep3, i32 4, <4 x i1> %mask)
; unaligned
- %gepu = getelementptr float, float* %base, <4 x i32> %ind32
- %resu = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gepu, i32 1, <4 x i1> %mask, <4 x float> undef)
- call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %resu, <4 x float*> %gepu, i32 1, <4 x i1> %mask)
+ %gepu = getelementptr float, ptr %base, <4 x i32> %ind32
+ %resu = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gepu, i32 1, <4 x i1> %mask, <4 x float> undef)
+ call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %resu, <4 x ptr> %gepu, i32 1, <4 x i1> %mask)
; 1 scale
- %gepos = getelementptr i8, i8* %base8, <4 x i32> %indzext
- %geposb = bitcast <4 x i8*> %gepos to <4 x float*>
- %resos = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %geposb, i32 4, <4 x i1> %mask, <4 x float> undef)
- call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %resos, <4 x float*> %geposb, i32 4, <4 x i1> %mask)
+ %gepos = getelementptr i8, ptr %base8, <4 x i32> %indzext
+ %geposb = bitcast <4 x ptr> %gepos to <4 x ptr>
+ %resos = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %geposb, i32 4, <4 x i1> %mask, <4 x float> undef)
+ call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %resos, <4 x ptr> %geposb, i32 4, <4 x i1> %mask)
; bad scale (but doesn't really matter because i32)
- %gepbs = getelementptr i16, i16* %base16, <4 x i32> %indzext
- %gepbsb = bitcast <4 x i16*> %gepbs to <4 x float*>
- %resbs = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gepbsb, i32 4, <4 x i1> %mask, <4 x float> undef)
- call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %resbs, <4 x float*> %gepbsb, i32 4, <4 x i1> %mask)
+ %gepbs = getelementptr i16, ptr %base16, <4 x i32> %indzext
+ %gepbsb = bitcast <4 x ptr> %gepbs to <4 x ptr>
+ %resbs = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gepbsb, i32 4, <4 x i1> %mask, <4 x float> undef)
+ call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %resbs, <4 x ptr> %gepbsb, i32 4, <4 x i1> %mask)
ret void
}
-define void @gep_v4i16(i16* %base, <4 x i32> %ind32, <4 x i16> %ind16, <4 x i1> %mask) {
+define void @gep_v4i16(ptr %base, <4 x i32> %ind32, <4 x i16> %ind16, <4 x i1> %mask) {
; CHECK-LABEL: 'gep_v4i16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep1 = getelementptr i16, i16* %base, <4 x i32> %ind32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %res1 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %gep1, i32 2, <4 x i1> %mask, <4 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %res1, <4 x i16*> %gep1, i32 2, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep1 = getelementptr i16, ptr %base, <4 x i32> %ind32
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %res1 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep1, i32 2, <4 x i1> %mask, <4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res1, <4 x ptr> %gep1, i32 2, <4 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %indzext = zext <4 x i16> %ind16 to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep2 = getelementptr i16, i16* %base, <4 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %res2 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %gep2, i32 2, <4 x i1> %mask, <4 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %res2, <4 x i16*> %gep2, i32 2, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep2 = getelementptr i16, ptr %base, <4 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %res2 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep2, i32 2, <4 x i1> %mask, <4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res2, <4 x ptr> %gep2, i32 2, <4 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %indsext = sext <4 x i16> %ind16 to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep3 = getelementptr i16, i16* %base, <4 x i32> %indsext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %res3 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %gep3, i32 2, <4 x i1> %mask, <4 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %res3, <4 x i16*> %gep3, i32 2, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep5 = getelementptr i16, i16* %base, <4 x i16> %ind16
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res5 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %gep5, i32 2, <4 x i1> %mask, <4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep3 = getelementptr i16, ptr %base, <4 x i32> %indsext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %res3 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep3, i32 2, <4 x i1> %mask, <4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res3, <4 x ptr> %gep3, i32 2, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep5 = getelementptr i16, ptr %base, <4 x i16> %ind16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res5 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res5zext = zext <4 x i16> %res5 to <4 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res5trunc = trunc <4 x i32> %res5zext to <4 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %res5trunc, <4 x i16*> %gep5, i32 4, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res6 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %gep5, i32 2, <4 x i1> %mask, <4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res5trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res6 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res6sext = sext <4 x i16> %res6 to <4 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res6trunc = trunc <4 x i32> %res6sext to <4 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %res6trunc, <4 x i16*> %gep5, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res6trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %gep1 = getelementptr i16, i16* %base, <4 x i32> %ind32
- %res1 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %gep1, i32 2, <4 x i1> %mask, <4 x i16> undef)
- call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %res1, <4 x i16*> %gep1, i32 2, <4 x i1> %mask)
+ %gep1 = getelementptr i16, ptr %base, <4 x i32> %ind32
+ %res1 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep1, i32 2, <4 x i1> %mask, <4 x i16> undef)
+ call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res1, <4 x ptr> %gep1, i32 2, <4 x i1> %mask)
%indzext = zext <4 x i16> %ind16 to <4 x i32>
- %gep2 = getelementptr i16, i16* %base, <4 x i32> %indzext
- %res2 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %gep2, i32 2, <4 x i1> %mask, <4 x i16> undef)
- call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %res2, <4 x i16*> %gep2, i32 2, <4 x i1> %mask)
+ %gep2 = getelementptr i16, ptr %base, <4 x i32> %indzext
+ %res2 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep2, i32 2, <4 x i1> %mask, <4 x i16> undef)
+ call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res2, <4 x ptr> %gep2, i32 2, <4 x i1> %mask)
%indsext = sext <4 x i16> %ind16 to <4 x i32>
- %gep3 = getelementptr i16, i16* %base, <4 x i32> %indsext
- %res3 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %gep3, i32 2, <4 x i1> %mask, <4 x i16> undef)
- call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %res3, <4 x i16*> %gep3, i32 2, <4 x i1> %mask)
+ %gep3 = getelementptr i16, ptr %base, <4 x i32> %indsext
+ %res3 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep3, i32 2, <4 x i1> %mask, <4 x i16> undef)
+ call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res3, <4 x ptr> %gep3, i32 2, <4 x i1> %mask)
; result zext
- %gep5 = getelementptr i16, i16* %base, <4 x i16> %ind16
- %res5 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %gep5, i32 2, <4 x i1> %mask, <4 x i16> undef)
+ %gep5 = getelementptr i16, ptr %base, <4 x i16> %ind16
+ %res5 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i16> undef)
%res5zext = zext <4 x i16> %res5 to <4 x i32>
%res5trunc = trunc <4 x i32> %res5zext to <4 x i16>
- call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %res5trunc, <4 x i16*> %gep5, i32 4, <4 x i1> %mask)
+ call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res5trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
; result sext
- %res6 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %gep5, i32 2, <4 x i1> %mask, <4 x i16> undef)
+ %res6 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i16> undef)
%res6sext = sext <4 x i16> %res6 to <4 x i32>
%res6trunc = trunc <4 x i32> %res6sext to <4 x i16>
- call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %res6trunc, <4 x i16*> %gep5, i32 4, <4 x i1> %mask)
+ call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %res6trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
ret void
}
-define void @gep_v4i8(i8* %base, <4 x i8> %ind8, <4 x i1> %mask) {
+define void @gep_v4i8(ptr %base, <4 x i8> %ind8, <4 x i1> %mask) {
; CHECK-LABEL: 'gep_v4i8'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep5 = getelementptr i8, i8* %base, <4 x i8> %ind8
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res5 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %gep5, i32 2, <4 x i1> %mask, <4 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep5 = getelementptr i8, ptr %base, <4 x i8> %ind8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res5 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res5zext = zext <4 x i8> %res5 to <4 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res5trunc = trunc <4 x i32> %res5zext to <4 x i8>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %res5trunc, <4 x i8*> %gep5, i32 4, <4 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res6 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %gep5, i32 2, <4 x i1> %mask, <4 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %res5trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res6 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res6sext = sext <4 x i8> %res6 to <4 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res6trunc = trunc <4 x i32> %res6sext to <4 x i8>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %res6trunc, <4 x i8*> %gep5, i32 4, <4 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %res6trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; result zext
- %gep5 = getelementptr i8, i8* %base, <4 x i8> %ind8
- %res5 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %gep5, i32 2, <4 x i1> %mask, <4 x i8> undef)
+ %gep5 = getelementptr i8, ptr %base, <4 x i8> %ind8
+ %res5 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i8> undef)
%res5zext = zext <4 x i8> %res5 to <4 x i32>
%res5trunc = trunc <4 x i32> %res5zext to <4 x i8>
- call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %res5trunc, <4 x i8*> %gep5, i32 4, <4 x i1> %mask)
+ call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %res5trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
; result sext
- %res6 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %gep5, i32 2, <4 x i1> %mask, <4 x i8> undef)
+ %res6 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %gep5, i32 2, <4 x i1> %mask, <4 x i8> undef)
%res6sext = sext <4 x i8> %res6 to <4 x i32>
%res6trunc = trunc <4 x i32> %res6sext to <4 x i8>
- call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %res6trunc, <4 x i8*> %gep5, i32 4, <4 x i1> %mask)
+ call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %res6trunc, <4 x ptr> %gep5, i32 4, <4 x i1> %mask)
ret void
}
-define void @gep_v8i16(i16* %base, i8* %base8, i32* %base32, <8 x i32> %ind32, <8 x i16> %ind16, <8 x i8> %ind8, <8 x i1> %mask) {
+define void @gep_v8i16(ptr %base, ptr %base8, ptr %base32, <8 x i32> %ind32, <8 x i16> %ind16, <8 x i8> %ind8, <8 x i1> %mask) {
; CHECK-LABEL: 'gep_v8i16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep1 = getelementptr i16, i16* %base, <8 x i32> %ind32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %res1 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gep1, i32 2, <8 x i1> %mask, <8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %res1, <8 x i16*> %gep1, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep1 = getelementptr i16, ptr %base, <8 x i32> %ind32
+; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %res1 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep1, i32 2, <8 x i1> %mask, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %res1, <8 x ptr> %gep1, i32 2, <8 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %indzext = zext <8 x i16> %ind16 to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep2 = getelementptr i16, i16* %base, <8 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res2 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gep2, i32 2, <8 x i1> %mask, <8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %res2, <8 x i16*> %gep2, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep2 = getelementptr i16, ptr %base, <8 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res2 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep2, i32 2, <8 x i1> %mask, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %res2, <8 x ptr> %gep2, i32 2, <8 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %indsext = sext <8 x i16> %ind16 to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep3 = getelementptr i16, i16* %base, <8 x i32> %indsext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %res3 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gep3, i32 2, <8 x i1> %mask, <8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %res3, <8 x i16*> %gep3, i32 2, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %resu = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gep2, i32 1, <8 x i1> %mask, <8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %resu, <8 x i16*> %gep2, i32 1, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepos = getelementptr i8, i8* %base8, <8 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %geposb = bitcast <8 x i8*> %gepos to <8 x i16*>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %resos = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %geposb, i32 2, <8 x i1> %mask, <8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %resos, <8 x i16*> %geposb, i32 2, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i32, i32* %base32, <8 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <8 x i32*> %gepbs to <8 x i16*>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %resbs = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gepbsb, i32 2, <8 x i1> %mask, <8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %resbs, <8 x i16*> %gepbsb, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep3 = getelementptr i16, ptr %base, <8 x i32> %indsext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %res3 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep3, i32 2, <8 x i1> %mask, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %res3, <8 x ptr> %gep3, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %resu = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep2, i32 1, <8 x i1> %mask, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %resu, <8 x ptr> %gep2, i32 1, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepos = getelementptr i8, ptr %base8, <8 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %geposb = bitcast <8 x ptr> %gepos to <8 x ptr>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %resos = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %geposb, i32 2, <8 x i1> %mask, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %resos, <8 x ptr> %geposb, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i32, ptr %base32, <8 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <8 x ptr> %gepbs to <8 x ptr>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %resbs = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gepbsb, i32 2, <8 x i1> %mask, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %resbs, <8 x ptr> %gepbsb, i32 2, <8 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %indzext4 = zext <8 x i16> %ind16 to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep4 = getelementptr i16, i16* %base, <8 x i32> %indzext4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep4 = getelementptr i16, ptr %base, <8 x i32> %indzext4
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %indtrunc = trunc <8 x i32> %ind32 to <8 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %indtrunc, <8 x i16*> %gep4, i32 2, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gep4, i32 2, <8 x i1> %mask, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %indtrunc, <8 x ptr> %gep4, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep4, i32 2, <8 x i1> %mask, <8 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ressext = sext <8 x i16> %res to <8 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %restrunc = trunc <8 x i32> %ressext to <8 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %restrunc, <8 x i16*> %gep4, i32 4, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %restrunc, <8 x ptr> %gep4, i32 4, <8 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; no offset ext
- %gep1 = getelementptr i16, i16* %base, <8 x i32> %ind32
- %res1 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gep1, i32 2, <8 x i1> %mask, <8 x i16> undef)
- call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %res1, <8 x i16*> %gep1, i32 2, <8 x i1> %mask)
+ %gep1 = getelementptr i16, ptr %base, <8 x i32> %ind32
+ %res1 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep1, i32 2, <8 x i1> %mask, <8 x i16> undef)
+ call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %res1, <8 x ptr> %gep1, i32 2, <8 x i1> %mask)
; offset zext
%indzext = zext <8 x i16> %ind16 to <8 x i32>
- %gep2 = getelementptr i16, i16* %base, <8 x i32> %indzext
- %res2 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gep2, i32 2, <8 x i1> %mask, <8 x i16> undef)
- call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %res2, <8 x i16*> %gep2, i32 2, <8 x i1> %mask)
+ %gep2 = getelementptr i16, ptr %base, <8 x i32> %indzext
+ %res2 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep2, i32 2, <8 x i1> %mask, <8 x i16> undef)
+ call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %res2, <8 x ptr> %gep2, i32 2, <8 x i1> %mask)
; offset sext
%indsext = sext <8 x i16> %ind16 to <8 x i32>
- %gep3 = getelementptr i16, i16* %base, <8 x i32> %indsext
- %res3 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gep3, i32 2, <8 x i1> %mask, <8 x i16> undef)
- call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %res3, <8 x i16*> %gep3, i32 2, <8 x i1> %mask)
+ %gep3 = getelementptr i16, ptr %base, <8 x i32> %indsext
+ %res3 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep3, i32 2, <8 x i1> %mask, <8 x i16> undef)
+ call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %res3, <8 x ptr> %gep3, i32 2, <8 x i1> %mask)
; unaligned
- %resu = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gep2, i32 1, <8 x i1> %mask, <8 x i16> undef)
- call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %resu, <8 x i16*> %gep2, i32 1, <8 x i1> %mask)
+ %resu = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep2, i32 1, <8 x i1> %mask, <8 x i16> undef)
+ call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %resu, <8 x ptr> %gep2, i32 1, <8 x i1> %mask)
; 1 scale
- %gepos = getelementptr i8, i8* %base8, <8 x i32> %indzext
- %geposb = bitcast <8 x i8*> %gepos to <8 x i16*>
- %resos = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %geposb, i32 2, <8 x i1> %mask, <8 x i16> undef)
- call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %resos, <8 x i16*> %geposb, i32 2, <8 x i1> %mask)
+ %gepos = getelementptr i8, ptr %base8, <8 x i32> %indzext
+ %geposb = bitcast <8 x ptr> %gepos to <8 x ptr>
+ %resos = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %geposb, i32 2, <8 x i1> %mask, <8 x i16> undef)
+ call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %resos, <8 x ptr> %geposb, i32 2, <8 x i1> %mask)
; bad scale
- %gepbs = getelementptr i32, i32* %base32, <8 x i32> %indzext
- %gepbsb = bitcast <8 x i32*> %gepbs to <8 x i16*>
- %resbs = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gepbsb, i32 2, <8 x i1> %mask, <8 x i16> undef)
- call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %resbs, <8 x i16*> %gepbsb, i32 2, <8 x i1> %mask)
+ %gepbs = getelementptr i32, ptr %base32, <8 x i32> %indzext
+ %gepbsb = bitcast <8 x ptr> %gepbs to <8 x ptr>
+ %resbs = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gepbsb, i32 2, <8 x i1> %mask, <8 x i16> undef)
+ call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %resbs, <8 x ptr> %gepbsb, i32 2, <8 x i1> %mask)
; trunc scatter
%indzext4 = zext <8 x i16> %ind16 to <8 x i32>
- %gep4 = getelementptr i16, i16* %base, <8 x i32> %indzext4
+ %gep4 = getelementptr i16, ptr %base, <8 x i32> %indzext4
%indtrunc = trunc <8 x i32> %ind32 to <8 x i16>
- call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %indtrunc, <8 x i16*> %gep4, i32 2, <8 x i1> %mask)
+ call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %indtrunc, <8 x ptr> %gep4, i32 2, <8 x i1> %mask)
; ext result to <8 x i32>
- %res = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gep4, i32 2, <8 x i1> %mask, <8 x i16> undef)
+ %res = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %gep4, i32 2, <8 x i1> %mask, <8 x i16> undef)
%ressext = sext <8 x i16> %res to <8 x i32>
%restrunc = trunc <8 x i32> %ressext to <8 x i16>
- call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %restrunc, <8 x i16*> %gep4, i32 4, <8 x i1> %mask)
+ call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %restrunc, <8 x ptr> %gep4, i32 4, <8 x i1> %mask)
ret void
}
-define void @gep_v8f16(half* %base, i8* %base8, i32* %base32, <8 x i32> %ind32, <8 x i16> %ind16, <8 x i1> %mask) {
+define void @gep_v8f16(ptr %base, ptr %base8, ptr %base32, <8 x i32> %ind32, <8 x i16> %ind16, <8 x i1> %mask) {
; CHECK-LABEL: 'gep_v8f16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep1 = getelementptr half, half* %base, <8 x i32> %ind32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res1 = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gep1, i32 2, <8 x i1> %mask, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %res1, <8 x half*> %gep1, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep1 = getelementptr half, ptr %base, <8 x i32> %ind32
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res1 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep1, i32 2, <8 x i1> %mask, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %res1, <8 x ptr> %gep1, i32 2, <8 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %indzext = zext <8 x i16> %ind16 to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep2 = getelementptr half, half* %base, <8 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res2 = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gep2, i32 2, <8 x i1> %mask, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %res2, <8 x half*> %gep2, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep2 = getelementptr half, ptr %base, <8 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res2 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep2, i32 2, <8 x i1> %mask, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %res2, <8 x ptr> %gep2, i32 2, <8 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %indsext = sext <8 x i16> %ind16 to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep3 = getelementptr half, half* %base, <8 x i32> %indsext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res3 = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gep3, i32 2, <8 x i1> %mask, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %res3, <8 x half*> %gep3, i32 2, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %resu = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gep2, i32 1, <8 x i1> %mask, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %resu, <8 x half*> %gep2, i32 1, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepos = getelementptr i8, i8* %base8, <8 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %geposb = bitcast <8 x i8*> %gepos to <8 x half*>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %resos = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %geposb, i32 2, <8 x i1> %mask, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %resos, <8 x half*> %geposb, i32 2, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i32, i32* %base32, <8 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <8 x i32*> %gepbs to <8 x half*>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %resbs = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gepbsb, i32 2, <8 x i1> %mask, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %resbs, <8 x half*> %gepbsb, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep3 = getelementptr half, ptr %base, <8 x i32> %indsext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res3 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep3, i32 2, <8 x i1> %mask, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %res3, <8 x ptr> %gep3, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %resu = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep2, i32 1, <8 x i1> %mask, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %resu, <8 x ptr> %gep2, i32 1, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepos = getelementptr i8, ptr %base8, <8 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %geposb = bitcast <8 x ptr> %gepos to <8 x ptr>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %resos = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %geposb, i32 2, <8 x i1> %mask, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %resos, <8 x ptr> %geposb, i32 2, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i32, ptr %base32, <8 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <8 x ptr> %gepbs to <8 x ptr>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %resbs = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gepbsb, i32 2, <8 x i1> %mask, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %resbs, <8 x ptr> %gepbsb, i32 2, <8 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; no offset ext
- %gep1 = getelementptr half, half* %base, <8 x i32> %ind32
- %res1 = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gep1, i32 2, <8 x i1> %mask, <8 x half> undef)
- call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %res1, <8 x half*> %gep1, i32 2, <8 x i1> %mask)
+ %gep1 = getelementptr half, ptr %base, <8 x i32> %ind32
+ %res1 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep1, i32 2, <8 x i1> %mask, <8 x half> undef)
+ call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %res1, <8 x ptr> %gep1, i32 2, <8 x i1> %mask)
; offset zext
%indzext = zext <8 x i16> %ind16 to <8 x i32>
- %gep2 = getelementptr half, half* %base, <8 x i32> %indzext
- %res2 = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gep2, i32 2, <8 x i1> %mask, <8 x half> undef)
- call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %res2, <8 x half*> %gep2, i32 2, <8 x i1> %mask)
+ %gep2 = getelementptr half, ptr %base, <8 x i32> %indzext
+ %res2 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep2, i32 2, <8 x i1> %mask, <8 x half> undef)
+ call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %res2, <8 x ptr> %gep2, i32 2, <8 x i1> %mask)
; offset sext
%indsext = sext <8 x i16> %ind16 to <8 x i32>
- %gep3 = getelementptr half, half* %base, <8 x i32> %indsext
- %res3 = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gep3, i32 2, <8 x i1> %mask, <8 x half> undef)
- call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %res3, <8 x half*> %gep3, i32 2, <8 x i1> %mask)
+ %gep3 = getelementptr half, ptr %base, <8 x i32> %indsext
+ %res3 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep3, i32 2, <8 x i1> %mask, <8 x half> undef)
+ call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %res3, <8 x ptr> %gep3, i32 2, <8 x i1> %mask)
; unaligned
- %resu = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gep2, i32 1, <8 x i1> %mask, <8 x half> undef)
- call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %resu, <8 x half*> %gep2, i32 1, <8 x i1> %mask)
+ %resu = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gep2, i32 1, <8 x i1> %mask, <8 x half> undef)
+ call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %resu, <8 x ptr> %gep2, i32 1, <8 x i1> %mask)
; 1 scale
- %gepos = getelementptr i8, i8* %base8, <8 x i32> %indzext
- %geposb = bitcast <8 x i8*> %gepos to <8 x half*>
- %resos = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %geposb, i32 2, <8 x i1> %mask, <8 x half> undef)
- call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %resos, <8 x half*> %geposb, i32 2, <8 x i1> %mask)
+ %gepos = getelementptr i8, ptr %base8, <8 x i32> %indzext
+ %geposb = bitcast <8 x ptr> %gepos to <8 x ptr>
+ %resos = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %geposb, i32 2, <8 x i1> %mask, <8 x half> undef)
+ call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %resos, <8 x ptr> %geposb, i32 2, <8 x i1> %mask)
; bad scale
- %gepbs = getelementptr i32, i32* %base32, <8 x i32> %indzext
- %gepbsb = bitcast <8 x i32*> %gepbs to <8 x half*>
- %resbs = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gepbsb, i32 2, <8 x i1> %mask, <8 x half> undef)
- call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %resbs, <8 x half*> %gepbsb, i32 2, <8 x i1> %mask)
+ %gepbs = getelementptr i32, ptr %base32, <8 x i32> %indzext
+ %gepbsb = bitcast <8 x ptr> %gepbs to <8 x ptr>
+ %resbs = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %gepbsb, i32 2, <8 x i1> %mask, <8 x half> undef)
+ call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %resbs, <8 x ptr> %gepbsb, i32 2, <8 x i1> %mask)
ret void
}
-define void @gep_v8i8(i8* %base, <8 x i8> %ind8, <8 x i1> %mask) {
+define void @gep_v8i8(ptr %base, <8 x i8> %ind8, <8 x i1> %mask) {
; CHECK-LABEL: 'gep_v8i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %indzext = zext <8 x i8> %ind8 to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep5 = getelementptr i8, i8* %base, <8 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res5 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> %gep5, i32 2, <8 x i1> %mask, <8 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep5 = getelementptr i8, ptr %base, <8 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res5 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %gep5, i32 2, <8 x i1> %mask, <8 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res5zext = zext <8 x i8> %res5 to <8 x i16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res5trunc = trunc <8 x i16> %res5zext to <8 x i8>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> %res5trunc, <8 x i8*> %gep5, i32 4, <8 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res6 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> %gep5, i32 2, <8 x i1> %mask, <8 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %res5trunc, <8 x ptr> %gep5, i32 4, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res6 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %gep5, i32 2, <8 x i1> %mask, <8 x i8> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res6sext = sext <8 x i8> %res6 to <8 x i16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res6trunc = trunc <8 x i16> %res6sext to <8 x i8>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> %res6trunc, <8 x i8*> %gep5, i32 4, <8 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %res6trunc, <8 x ptr> %gep5, i32 4, <8 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; result zext
%indzext = zext <8 x i8> %ind8 to <8 x i32>
- %gep5 = getelementptr i8, i8* %base, <8 x i32> %indzext
- %res5 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> %gep5, i32 2, <8 x i1> %mask, <8 x i8> undef)
+ %gep5 = getelementptr i8, ptr %base, <8 x i32> %indzext
+ %res5 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %gep5, i32 2, <8 x i1> %mask, <8 x i8> undef)
%res5zext = zext <8 x i8> %res5 to <8 x i16>
%res5trunc = trunc <8 x i16> %res5zext to <8 x i8>
- call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> %res5trunc, <8 x i8*> %gep5, i32 4, <8 x i1> %mask)
+ call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %res5trunc, <8 x ptr> %gep5, i32 4, <8 x i1> %mask)
; result sext
- %res6 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> %gep5, i32 2, <8 x i1> %mask, <8 x i8> undef)
+ %res6 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %gep5, i32 2, <8 x i1> %mask, <8 x i8> undef)
%res6sext = sext <8 x i8> %res6 to <8 x i16>
%res6trunc = trunc <8 x i16> %res6sext to <8 x i8>
- call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> %res6trunc, <8 x i8*> %gep5, i32 4, <8 x i1> %mask)
+ call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %res6trunc, <8 x ptr> %gep5, i32 4, <8 x i1> %mask)
ret void
}
-define void @gep_v16i8(i8* %base, i16* %base16, <16 x i8> %ind8, <16 x i32> %ind32, <16 x i1> %mask) {
+define void @gep_v16i8(ptr %base, ptr %base16, <16 x i8> %ind8, <16 x i32> %ind32, <16 x i1> %mask) {
; CHECK-LABEL: 'gep_v16i8'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep1 = getelementptr i8, i8* %base, <16 x i32> %ind32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %res1 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gep1, i32 1, <16 x i1> %mask, <16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %res1, <16 x i8*> %gep1, i32 2, <16 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep1 = getelementptr i8, ptr %base, <16 x i32> %ind32
+; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %res1 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gep1, i32 1, <16 x i1> %mask, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %res1, <16 x ptr> %gep1, i32 2, <16 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %indzext = zext <16 x i8> %ind8 to <16 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep2 = getelementptr i8, i8* %base, <16 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res2 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gep2, i32 2, <16 x i1> %mask, <16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %res2, <16 x i8*> %gep2, i32 2, <16 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep2 = getelementptr i8, ptr %base, <16 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res2 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gep2, i32 2, <16 x i1> %mask, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %res2, <16 x ptr> %gep2, i32 2, <16 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %indsext = sext <16 x i8> %ind8 to <16 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep3 = getelementptr i8, i8* %base, <16 x i32> %indsext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %res3 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gep3, i32 2, <16 x i1> %mask, <16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %res3, <16 x i8*> %gep3, i32 2, <16 x i1> %mask)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i16, i16* %base16, <16 x i32> %indzext
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <16 x i16*> %gepbs to <16 x i8*>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gepbsb, i32 2, <16 x i1> %mask, <16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %resbs, <16 x i8*> %gepbsb, i32 2, <16 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep3 = getelementptr i8, ptr %base, <16 x i32> %indsext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %res3 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gep3, i32 2, <16 x i1> %mask, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %res3, <16 x ptr> %gep3, i32 2, <16 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i16, ptr %base16, <16 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <16 x ptr> %gepbs to <16 x ptr>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gepbsb, i32 2, <16 x i1> %mask, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %resbs, <16 x ptr> %gepbsb, i32 2, <16 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %indzext4 = zext <16 x i8> %ind8 to <16 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep4 = getelementptr i8, i8* %base, <16 x i32> %indzext
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep4 = getelementptr i8, ptr %base, <16 x i32> %indzext
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %indtrunc = trunc <16 x i32> %ind32 to <16 x i8>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %indtrunc, <16 x i8*> %gep4, i32 2, <16 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %indtrunc, <16 x ptr> %gep4, i32 2, <16 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; no offset ext
- %gep1 = getelementptr i8, i8* %base, <16 x i32> %ind32
- %res1 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gep1, i32 1, <16 x i1> %mask, <16 x i8> undef)
- call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %res1, <16 x i8*> %gep1, i32 2, <16 x i1> %mask)
+ %gep1 = getelementptr i8, ptr %base, <16 x i32> %ind32
+ %res1 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gep1, i32 1, <16 x i1> %mask, <16 x i8> undef)
+ call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %res1, <16 x ptr> %gep1, i32 2, <16 x i1> %mask)
; offset zext
%indzext = zext <16 x i8> %ind8 to <16 x i32>
- %gep2 = getelementptr i8, i8* %base, <16 x i32> %indzext
- %res2 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gep2, i32 2, <16 x i1> %mask, <16 x i8> undef)
- call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %res2, <16 x i8*> %gep2, i32 2, <16 x i1> %mask)
+ %gep2 = getelementptr i8, ptr %base, <16 x i32> %indzext
+ %res2 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gep2, i32 2, <16 x i1> %mask, <16 x i8> undef)
+ call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %res2, <16 x ptr> %gep2, i32 2, <16 x i1> %mask)
; offset sext
%indsext = sext <16 x i8> %ind8 to <16 x i32>
- %gep3 = getelementptr i8, i8* %base, <16 x i32> %indsext
- %res3 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gep3, i32 2, <16 x i1> %mask, <16 x i8> undef)
- call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %res3, <16 x i8*> %gep3, i32 2, <16 x i1> %mask)
+ %gep3 = getelementptr i8, ptr %base, <16 x i32> %indsext
+ %res3 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gep3, i32 2, <16 x i1> %mask, <16 x i8> undef)
+ call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %res3, <16 x ptr> %gep3, i32 2, <16 x i1> %mask)
; Bad scale
- %gepbs = getelementptr i16, i16* %base16, <16 x i32> %indzext
- %gepbsb = bitcast <16 x i16*> %gepbs to <16 x i8*>
- %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gepbsb, i32 2, <16 x i1> %mask, <16 x i8> undef)
- call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %resbs, <16 x i8*> %gepbsb, i32 2, <16 x i1> %mask)
+ %gepbs = getelementptr i16, ptr %base16, <16 x i32> %indzext
+ %gepbsb = bitcast <16 x ptr> %gepbs to <16 x ptr>
+ %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gepbsb, i32 2, <16 x i1> %mask, <16 x i8> undef)
+ call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %resbs, <16 x ptr> %gepbsb, i32 2, <16 x i1> %mask)
; trunc scatter
%indzext4 = zext <16 x i8> %ind8 to <16 x i32>
- %gep4 = getelementptr i8, i8* %base, <16 x i32> %indzext
+ %gep4 = getelementptr i8, ptr %base, <16 x i32> %indzext
%indtrunc = trunc <16 x i32> %ind32 to <16 x i8>
- call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %indtrunc, <16 x i8*> %gep4, i32 2, <16 x i1> %mask)
+ call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %indtrunc, <16 x ptr> %gep4, i32 2, <16 x i1> %mask)
ret void
}
-define void @gep_v16i8p(<16 x i8*> %base, i32 %off, <16 x i1> %mask) {
+define void @gep_v16i8p(<16 x ptr> %base, i32 %off, <16 x i1> %mask) {
; CHECK-LABEL: 'gep_v16i8p'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i8, <16 x i8*> %base, i32 %off
-; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gepbs, i32 2, <16 x i1> %mask, <16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %resbs, <16 x i8*> %gepbs, i32 2, <16 x i1> %mask)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i8, <16 x ptr> %base, i32 %off
+; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gepbs, i32 2, <16 x i1> %mask, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %resbs, <16 x ptr> %gepbs, i32 2, <16 x i1> %mask)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %gepbs = getelementptr i8, <16 x i8*> %base, i32 %off
- %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gepbs, i32 2, <16 x i1> %mask, <16 x i8> undef)
- call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %resbs, <16 x i8*> %gepbs, i32 2, <16 x i1> %mask)
+ %gepbs = getelementptr i8, <16 x ptr> %base, i32 %off
+ %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %gepbs, i32 2, <16 x i1> %mask, <16 x i8> undef)
+ call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %resbs, <16 x ptr> %gepbs, i32 2, <16 x i1> %mask)
ret void
}
-declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*>, i32, <4 x i1>, <4 x double>)
-declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*>, i32, <2 x i1>, <2 x double>)
-
-declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
-declare <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*>, i32, <8 x i1>, <8 x float>)
-declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*>, i32, <4 x i1>, <4 x float>)
-declare <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*>, i32, <2 x i1>, <2 x float>)
-
-declare <16 x half> @llvm.masked.gather.v16f16.v16p0f16(<16 x half*>, i32, <16 x i1>, <16 x half>)
-declare <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*>, i32, <8 x i1>, <8 x half>)
-declare <4 x half> @llvm.masked.gather.v4f16.v4p0f16(<4 x half*>, i32, <4 x i1>, <4 x half>)
-declare <2 x half> @llvm.masked.gather.v2f16.v2p0f16(<2 x half*>, i32, <2 x i1>, <2 x half>)
-
-declare <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>)
-declare <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>)
-
-declare <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>)
-declare <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>, i32, <8 x i1>, <8 x i32>)
-declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
-declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
-declare <4 x i32*> @llvm.masked.gather.v4p0i32.v4p0p0i32(<4 x i32**>, i32, <4 x i1>, <4 x i32*>)
-
-declare <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*>, i32, <16 x i1>, <16 x i16>)
-declare <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*>, i32, <8 x i1>, <8 x i16>)
-declare <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*>, i32, <4 x i1>, <4 x i16>)
-declare <2 x i16> @llvm.masked.gather.v2i16.v2p0i16(<2 x i16*>, i32, <2 x i1>, <2 x i16>)
-
-declare <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*>, i32, <32 x i1>, <32 x i8>)
-declare <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*>, i32, <16 x i1>, <16 x i8>)
-declare <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*>, i32, <8 x i1>, <8 x i8>)
-declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32, <4 x i1>, <4 x i8>)
-declare <2 x i8> @llvm.masked.gather.v2i8.v2p0i8(<2 x i8*>, i32, <2 x i1>, <2 x i8>)
-
-declare void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double>, <4 x double*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double>, <2 x double*>, i32, <2 x i1>)
-
-declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float>, <8 x float*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float>, <4 x float*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float>, <2 x float*>, i32, <2 x i1>)
-
-declare void @llvm.masked.scatter.v16f16.v16p0f16(<16 x half>, <16 x half*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half>, <8 x half*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half>, <4 x half*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2f16.v2p0f16(<2 x half>, <2 x half*>, i32, <2 x i1>)
-
-declare void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64>, <4 x i64*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64>, <2 x i64*>, i32, <2 x i1>)
-
-declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>, <16 x i32*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32>, <8 x i32*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32>, <2 x i32*>, i32, <2 x i1>)
-
-declare void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16>, <16 x i16*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16>, <8 x i16*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16>, <4 x i16*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2i16.v2p0i16(<2 x i16>, <2 x i16*>, i32, <2 x i1>)
-
-declare void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8>, <32 x i8*>, i32, <32 x i1>)
-declare void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8>, <16 x i8*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8>, <8 x i8*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8>, <4 x i8*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2i8.v2p0i8(<2 x i8>, <2 x i8*>, i32, <2 x i1>)
+declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x double>)
+declare <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x double>)
+
+declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x float>)
+declare <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x float>)
+declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>)
+declare <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x float>)
+
+declare <16 x half> @llvm.masked.gather.v16f16.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x half>)
+declare <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x half>)
+declare <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x half>)
+declare <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x half>)
+
+declare <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i64>)
+declare <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i64>)
+
+declare <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i32>)
+declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i32>)
+declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
+declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i32>)
+declare <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x ptr>)
+
+declare <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i16>)
+declare <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i16>)
+declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>)
+declare <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i16>)
+
+declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i8>)
+declare <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i8>)
+declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>)
+declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i8>)
+declare <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i8>)
+
+declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, i32, <2 x i1>)
+
+declare void @llvm.masked.scatter.v16f32.v16p0(<16 x float>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, i32, <2 x i1>)
+
+declare void @llvm.masked.scatter.v16f16.v16p0(<16 x half>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v8f16.v8p0(<8 x half>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4f16.v4p0(<4 x half>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2f16.v2p0(<2 x half>, <2 x ptr>, i32, <2 x i1>)
+
+declare void @llvm.masked.scatter.v4i64.v4p0(<4 x i64>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2i64.v2p0(<2 x i64>, <2 x ptr>, i32, <2 x i1>)
+
+declare void @llvm.masked.scatter.v16i32.v16p0(<16 x i32>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, i32, <2 x i1>)
+
+declare void @llvm.masked.scatter.v16i16.v16p0(<16 x i16>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2i16.v2p0(<2 x i16>, <2 x ptr>, i32, <2 x i1>)
+
+declare void @llvm.masked.scatter.v32i8.v32p0(<32 x i8>, <32 x ptr>, i32, <32 x i1>)
+declare void @llvm.masked.scatter.v16i8.v16p0(<16 x i8>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v8i8.v8p0(<8 x i8>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2i8.v2p0(<2 x i8>, <2 x ptr>, i32, <2 x i1>)
%T432 = type <4 x i32>
%T464 = type <4 x i64>
-define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) {
+define void @direct(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST-LABEL: 'direct'
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, <4 x i32>* %loadaddr, align 8
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, <4 x i32>* %loadaddr2, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, ptr %loadaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, ptr %loadaddr2, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <4 x i32> %v0, %v1
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %v0 = load %T432, %T432* %loadaddr
+ %v0 = load %T432, ptr %loadaddr
; ASM: vld1.64
- %v1 = load %T432, %T432* %loadaddr2
+ %v1 = load %T432, ptr %loadaddr2
; ASM: vld1.64
%r3 = shl %T432 %v0, %v1
; ASM: vshl.i32
- store %T432 %r3, %T432* %storeaddr
+ store %T432 %r3, ptr %storeaddr
; ASM: vst1.64
ret void
}
-define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
+define void @ups1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST-LABEL: 'ups1632'
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, <4 x i16>* %loadaddr, align 8
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, <4 x i16>* %loadaddr2, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, ptr %loadaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, ptr %loadaddr2, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = sext <4 x i16> %v0 to <4 x i32>
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext <4 x i16> %v1 to <4 x i32>
; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <4 x i32> %r1, %r2
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %v0 = load %T416, %T416* %loadaddr
+ %v0 = load %T416, ptr %loadaddr
; ASM: vldr
- %v1 = load %T416, %T416* %loadaddr2
+ %v1 = load %T416, ptr %loadaddr2
; ASM: vldr
%r1 = sext %T416 %v0 to %T432
%r2 = sext %T416 %v1 to %T432
%r3 = shl %T432 %r1, %r2
; ASM: vshll.s16
- store %T432 %r3, %T432* %storeaddr
+ store %T432 %r3, ptr %storeaddr
; ASM: vst1.64
ret void
}
-define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
+define void @upu1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST-LABEL: 'upu1632'
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, <4 x i16>* %loadaddr, align 8
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, <4 x i16>* %loadaddr2, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, ptr %loadaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, ptr %loadaddr2, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext <4 x i16> %v0 to <4 x i32>
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = zext <4 x i16> %v1 to <4 x i32>
; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <4 x i32> %r1, %r2
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %v0 = load %T416, %T416* %loadaddr
+ %v0 = load %T416, ptr %loadaddr
; ASM: vldr
- %v1 = load %T416, %T416* %loadaddr2
+ %v1 = load %T416, ptr %loadaddr2
; ASM: vldr
%r1 = zext %T416 %v0 to %T432
%r2 = zext %T416 %v1 to %T432
%r3 = shl %T432 %r1, %r2
; ASM: vshll.u16
- store %T432 %r3, %T432* %storeaddr
+ store %T432 %r3, ptr %storeaddr
; ASM: vst1.64
ret void
}
-define void @ups3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
+define void @ups3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST-LABEL: 'ups3264'
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, <2 x i32>* %loadaddr, align 8
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, <2 x i32>* %loadaddr2, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, ptr %loadaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, ptr %loadaddr2, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <2 x i32> %v0, %v1
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = sext <2 x i32> %r3 to <2 x i64>
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, <2 x i64>* %storeaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, ptr %storeaddr, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %v0 = load %T232, %T232* %loadaddr
+ %v0 = load %T232, ptr %loadaddr
; ASM: vldr
- %v1 = load %T232, %T232* %loadaddr2
+ %v1 = load %T232, ptr %loadaddr2
; ASM: vldr
%r3 = shl %T232 %v0, %v1
; ASM: vshl.i32
%st = sext %T232 %r3 to %T264
; ASM: vmovl.s32
- store %T264 %st, %T264* %storeaddr
+ store %T264 %st, ptr %storeaddr
; ASM: vst1.64
ret void
}
-define void @upu3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
+define void @upu3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST-LABEL: 'upu3264'
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, <2 x i32>* %loadaddr, align 8
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, <2 x i32>* %loadaddr2, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, ptr %loadaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, ptr %loadaddr2, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <2 x i32> %v0, %v1
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = zext <2 x i32> %r3 to <2 x i64>
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, <2 x i64>* %storeaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, ptr %storeaddr, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %v0 = load %T232, %T232* %loadaddr
+ %v0 = load %T232, ptr %loadaddr
; ASM: vldr
- %v1 = load %T232, %T232* %loadaddr2
+ %v1 = load %T232, ptr %loadaddr2
; ASM: vldr
%r3 = shl %T232 %v0, %v1
; ASM: vshl.i32
%st = zext %T232 %r3 to %T264
; ASM: vmovl.u32
- store %T264 %st, %T264* %storeaddr
+ store %T264 %st, ptr %storeaddr
; ASM: vst1.64
ret void
}
-define void @dn3216(%T432* %loadaddr, %T432* %loadaddr2, %T416* %storeaddr) {
+define void @dn3216(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST-LABEL: 'dn3216'
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, <4 x i32>* %loadaddr, align 8
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, <4 x i32>* %loadaddr2, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, ptr %loadaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, ptr %loadaddr2, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <4 x i32> %v0, %v1
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = trunc <4 x i32> %r3 to <4 x i16>
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %st, <4 x i16>* %storeaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %st, ptr %storeaddr, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %v0 = load %T432, %T432* %loadaddr
+ %v0 = load %T432, ptr %loadaddr
; ASM: vld1.64
- %v1 = load %T432, %T432* %loadaddr2
+ %v1 = load %T432, ptr %loadaddr2
; ASM: vld1.64
%r3 = shl %T432 %v0, %v1
; ASM: vshl.i32
%st = trunc %T432 %r3 to %T416
; ASM: vmovn.i32
- store %T416 %st, %T416* %storeaddr
+ store %T416 %st, ptr %storeaddr
; ASM: vstr
ret void
}
%T432 = type <4 x i32>
%T464 = type <4 x i64>
-define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) {
+define void @direct(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST-LABEL: 'direct'
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, <4 x i32>* %loadaddr, align 8
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, <4 x i32>* %loadaddr2, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, ptr %loadaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, ptr %loadaddr2, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <4 x i32> %v0, %v1
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %v0 = load %T432, %T432* %loadaddr
+ %v0 = load %T432, ptr %loadaddr
; ASM: vld1.64
- %v1 = load %T432, %T432* %loadaddr2
+ %v1 = load %T432, ptr %loadaddr2
; ASM: vld1.64
%r3 = sub %T432 %v0, %v1
; ASM: vsub.i32
- store %T432 %r3, %T432* %storeaddr
+ store %T432 %r3, ptr %storeaddr
; ASM: vst1.64
ret void
}
-define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
+define void @ups1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST-LABEL: 'ups1632'
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, <4 x i16>* %loadaddr, align 8
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, <4 x i16>* %loadaddr2, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, ptr %loadaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, ptr %loadaddr2, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = sext <4 x i16> %v0 to <4 x i32>
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext <4 x i16> %v1 to <4 x i32>
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <4 x i32> %r1, %r2
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %v0 = load %T416, %T416* %loadaddr
+ %v0 = load %T416, ptr %loadaddr
; ASM: vldr
- %v1 = load %T416, %T416* %loadaddr2
+ %v1 = load %T416, ptr %loadaddr2
; ASM: vldr
%r1 = sext %T416 %v0 to %T432
%r2 = sext %T416 %v1 to %T432
%r3 = sub %T432 %r1, %r2
; ASM: vsubl.s16
- store %T432 %r3, %T432* %storeaddr
+ store %T432 %r3, ptr %storeaddr
; ASM: vst1.64
ret void
}
-define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
+define void @upu1632(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST-LABEL: 'upu1632'
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, <4 x i16>* %loadaddr, align 8
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, <4 x i16>* %loadaddr2, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, ptr %loadaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, ptr %loadaddr2, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext <4 x i16> %v0 to <4 x i32>
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = zext <4 x i16> %v1 to <4 x i32>
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <4 x i32> %r1, %r2
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, ptr %storeaddr, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %v0 = load %T416, %T416* %loadaddr
+ %v0 = load %T416, ptr %loadaddr
; ASM: vldr
- %v1 = load %T416, %T416* %loadaddr2
+ %v1 = load %T416, ptr %loadaddr2
; ASM: vldr
%r1 = zext %T416 %v0 to %T432
%r2 = zext %T416 %v1 to %T432
%r3 = sub %T432 %r1, %r2
; ASM: vsubl.u16
- store %T432 %r3, %T432* %storeaddr
+ store %T432 %r3, ptr %storeaddr
; ASM: vst1.64
ret void
}
-define void @ups3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
+define void @ups3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST-LABEL: 'ups3264'
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, <2 x i32>* %loadaddr, align 8
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, <2 x i32>* %loadaddr2, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, ptr %loadaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, ptr %loadaddr2, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <2 x i32> %v0, %v1
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = sext <2 x i32> %r3 to <2 x i64>
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, <2 x i64>* %storeaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, ptr %storeaddr, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %v0 = load %T232, %T232* %loadaddr
+ %v0 = load %T232, ptr %loadaddr
; ASM: vldr
- %v1 = load %T232, %T232* %loadaddr2
+ %v1 = load %T232, ptr %loadaddr2
; ASM: vldr
%r3 = sub %T232 %v0, %v1
; ASM: vsub.i32
%st = sext %T232 %r3 to %T264
; ASM: vmovl.s32
- store %T264 %st, %T264* %storeaddr
+ store %T264 %st, ptr %storeaddr
; ASM: vst1.64
ret void
}
-define void @upu3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
+define void @upu3264(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST-LABEL: 'upu3264'
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, <2 x i32>* %loadaddr, align 8
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, <2 x i32>* %loadaddr2, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, ptr %loadaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, ptr %loadaddr2, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <2 x i32> %v0, %v1
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = zext <2 x i32> %r3 to <2 x i64>
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, <2 x i64>* %storeaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, ptr %storeaddr, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %v0 = load %T232, %T232* %loadaddr
+ %v0 = load %T232, ptr %loadaddr
; ASM: vldr
- %v1 = load %T232, %T232* %loadaddr2
+ %v1 = load %T232, ptr %loadaddr2
; ASM: vldr
%r3 = sub %T232 %v0, %v1
; ASM: vsub.i32
%st = zext %T232 %r3 to %T264
; ASM: vmovl.u32
- store %T264 %st, %T264* %storeaddr
+ store %T264 %st, ptr %storeaddr
; ASM: vst1.64
ret void
}
-define void @dn3216(%T432* %loadaddr, %T432* %loadaddr2, %T416* %storeaddr) {
+define void @dn3216(ptr %loadaddr, ptr %loadaddr2, ptr %storeaddr) {
; COST-LABEL: 'dn3216'
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, <4 x i32>* %loadaddr, align 8
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, <4 x i32>* %loadaddr2, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, ptr %loadaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, ptr %loadaddr2, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <4 x i32> %v0, %v1
; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = trunc <4 x i32> %r3 to <4 x i16>
-; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %st, <4 x i16>* %storeaddr, align 8
+; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %st, ptr %storeaddr, align 8
; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %v0 = load %T432, %T432* %loadaddr
+ %v0 = load %T432, ptr %loadaddr
; ASM: vld1.64
- %v1 = load %T432, %T432* %loadaddr2
+ %v1 = load %T432, ptr %loadaddr2
; ASM: vld1.64
%r3 = sub %T432 %v0, %v1
; ASM: vsub.i32
%st = trunc %T432 %r3 to %T416
; ASM: vmovn.i32
- store %T416 %st, %T416* %storeaddr
+ store %T416 %st, ptr %storeaddr
; ASM: vstr
ret void
}
define void @intrinsics() {
; CHECK-THUMB2-RECIP-LABEL: 'intrinsics'
; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = call i32 @llvm.arm.ssat(i32 undef, i32 undef)
-; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = tail call { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0f16(half* undef)
+; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = tail call { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0(ptr undef)
; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = call { i32, i32 } @llvm.arm.mve.sqrshrl(i32 undef, i32 undef, i32 undef, i32 48)
; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i16> undef, <8 x i16> undef)
; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-THUMB2-LAT-LABEL: 'intrinsics'
; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = call i32 @llvm.arm.ssat(i32 undef, i32 undef)
-; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = tail call { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0f16(half* undef)
+; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = tail call { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0(ptr undef)
; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = call { i32, i32 } @llvm.arm.mve.sqrshrl(i32 undef, i32 undef, i32 undef, i32 48)
; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i16> undef, <8 x i16> undef)
; CHECK-THUMB2-LAT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-THUMB2-SIZE-LABEL: 'intrinsics'
; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = call i32 @llvm.arm.ssat(i32 undef, i32 undef)
-; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = tail call { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0f16(half* undef)
+; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = tail call { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0(ptr undef)
; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = call { i32, i32 } @llvm.arm.mve.sqrshrl(i32 undef, i32 undef, i32 undef, i32 48)
; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i16> undef, <8 x i16> undef)
; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%t1 = call i32 @llvm.arm.ssat(i32 undef, i32 undef)
- %t2 = tail call { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0f16(half* undef)
+ %t2 = tail call { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0(ptr undef)
%t3 = call { i32, i32 } @llvm.arm.mve.sqrshrl(i32 undef, i32 undef, i32 undef, i32 48)
%t4 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i16> undef, <8 x i16> undef)
ret void
}
declare i32 @llvm.arm.ssat(i32, i32)
-declare { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0f16(half*)
+declare { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0(ptr)
declare { i32, i32 } @llvm.arm.mve.sqrshrl(i32, i32, i32, i32)
declare { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32, i32, i32, i32, i32, <8 x i16>, <8 x i16>)
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK
; Check that cost is 1 for unusual load to register sized load.
-define i32 @loadUnusualIntegerWithTrunc(i128* %ptr) {
+define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualIntegerWithTrunc'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, i128* %ptr, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc = trunc i128 %out to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %trunc
;
- %out = load i128, i128* %ptr
+ %out = load i128, ptr %ptr
%trunc = trunc i128 %out to i32
ret i32 %trunc
}
-define i128 @loadUnusualInteger(i128* %ptr) {
+define i128 @loadUnusualInteger(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualInteger'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %out = load i128, i128* %ptr, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %out = load i128, ptr %ptr, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i128 %out
;
- %out = load i128, i128* %ptr
+ %out = load i128, ptr %ptr
ret i128 %out
}
define i32 @stores(i32 %arg) {
; CHECK: cost of 1 {{.*}} store
- store i8 undef, i8* undef, align 4
+ store i8 undef, ptr undef, align 4
; CHECK: cost of 1 {{.*}} store
- store i16 undef, i16* undef, align 4
+ store i16 undef, ptr undef, align 4
; CHECK: cost of 1 {{.*}} store
- store i32 undef, i32* undef, align 4
+ store i32 undef, ptr undef, align 4
; CHECK: cost of 2 {{.*}} store
- store i64 undef, i64* undef, align 4
+ store i64 undef, ptr undef, align 4
; CHECK: cost of 4 {{.*}} store
- store i128 undef, i128* undef, align 4
+ store i128 undef, ptr undef, align 4
ret i32 undef
}
define i32 @loads(i32 %arg) {
; CHECK: cost of 1 {{.*}} load
- load i8, i8* undef, align 4
+ load i8, ptr undef, align 4
; CHECK: cost of 1 {{.*}} load
- load i16, i16* undef, align 4
+ load i16, ptr undef, align 4
; CHECK: cost of 1 {{.*}} load
- load i32, i32* undef, align 4
+ load i32, ptr undef, align 4
; CHECK: cost of 2 {{.*}} load
- load i64, i64* undef, align 4
+ load i64, ptr undef, align 4
; CHECK: cost of 4 {{.*}} load
- load i128, i128* undef, align 4
+ load i128, ptr undef, align 4
; FIXME: There actually are sub-vector Altivec loads, and so we could handle
; this with a small expense, but we don't currently.
; CHECK: cost of 42 {{.*}} load
- load <4 x i16>, <4 x i16>* undef, align 2
+ load <4 x i16>, ptr undef, align 2
; CHECK: cost of 2 {{.*}} load
- load <4 x i32>, <4 x i32>* undef, align 4
+ load <4 x i32>, ptr undef, align 4
; CHECK: cost of 46 {{.*}} load
- load <3 x float>, <3 x float>* undef, align 1
+ load <3 x float>, ptr undef, align 1
ret i32 undef
}
define void @matrix() {
; CHECK-LABEL: 'matrix'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %matrix1 = call <1 x i32> @llvm.matrix.column.major.load.v1i32.i64(i32* nonnull align 4 undef, i64 1, i1 false, i32 1, i32 1)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %matrix1 = call <1 x i32> @llvm.matrix.column.major.load.v1i32.i64(ptr nonnull align 4 undef, i64 1, i1 false, i32 1, i32 1)
; CHECK-NEXT: Cost Model: Found an estimated cost of 452 for instruction: %0 = call <10 x i32> @llvm.matrix.multiply.v10i32.v10i32.v1i32(<10 x i32> undef, <1 x i32> %matrix1, i32 10, i32 1, i32 1)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
entry:
- %matrix1 = call <1 x i32> @llvm.matrix.column.major.load.v1i32.i64(i32* nonnull align 4 undef, i64 1, i1 false, i32 1, i32 1)
+ %matrix1 = call <1 x i32> @llvm.matrix.column.major.load.v1i32.i64(ptr nonnull align 4 undef, i64 1, i1 false, i32 1, i32 1)
%0 = call <10 x i32> @llvm.matrix.multiply.v10i32.v10i32.v1i32(<10 x i32> undef, <1 x i32> %matrix1, i32 10, i32 1, i32 1)
ret void
}
-declare <1 x i32> @llvm.matrix.column.major.load.v1i32.i64(i32* nocapture, i64, i1 immarg, i32 immarg, i32 immarg) #2
+declare <1 x i32> @llvm.matrix.column.major.load.v1i32.i64(ptr nocapture, i64, i1 immarg, i32 immarg, i32 immarg) #2
declare <10 x i32> @llvm.matrix.multiply.v10i32.v10i32.v1i32(<10 x i32>, <1 x i32>, i32 immarg, i32 immarg, i32 immarg) #3
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
-define <16 x i8> @test_l_v16i8(<16 x i8>* %p) #0 {
+define <16 x i8> @test_l_v16i8(ptr %p) #0 {
entry:
- %r = load <16 x i8>, <16 x i8>* %p, align 1
+ %r = load <16 x i8>, ptr %p, align 1
ret <16 x i8> %r
; CHECK-LABEL: test_l_v16i8
-; CHECK: cost of 2 for instruction: %r = load <16 x i8>, <16 x i8>* %p, align 1
+; CHECK: cost of 2 for instruction: %r = load <16 x i8>, ptr %p, align 1
}
-define <32 x i8> @test_l_v32i8(<32 x i8>* %p) #0 {
+define <32 x i8> @test_l_v32i8(ptr %p) #0 {
entry:
- %r = load <32 x i8>, <32 x i8>* %p, align 1
+ %r = load <32 x i8>, ptr %p, align 1
ret <32 x i8> %r
; CHECK-LABEL: test_l_v32i8
-; CHECK: cost of 4 for instruction: %r = load <32 x i8>, <32 x i8>* %p, align 1
+; CHECK: cost of 4 for instruction: %r = load <32 x i8>, ptr %p, align 1
}
-define <8 x i16> @test_l_v8i16(<8 x i16>* %p) #0 {
+define <8 x i16> @test_l_v8i16(ptr %p) #0 {
entry:
- %r = load <8 x i16>, <8 x i16>* %p, align 2
+ %r = load <8 x i16>, ptr %p, align 2
ret <8 x i16> %r
; CHECK-LABEL: test_l_v8i16
-; CHECK: cost of 2 for instruction: %r = load <8 x i16>, <8 x i16>* %p, align 2
+; CHECK: cost of 2 for instruction: %r = load <8 x i16>, ptr %p, align 2
}
-define <16 x i16> @test_l_v16i16(<16 x i16>* %p) #0 {
+define <16 x i16> @test_l_v16i16(ptr %p) #0 {
entry:
- %r = load <16 x i16>, <16 x i16>* %p, align 2
+ %r = load <16 x i16>, ptr %p, align 2
ret <16 x i16> %r
; CHECK-LABEL: test_l_v16i16
-; CHECK: cost of 4 for instruction: %r = load <16 x i16>, <16 x i16>* %p, align 2
+; CHECK: cost of 4 for instruction: %r = load <16 x i16>, ptr %p, align 2
}
-define <4 x i32> @test_l_v4i32(<4 x i32>* %p) #0 {
+define <4 x i32> @test_l_v4i32(ptr %p) #0 {
entry:
- %r = load <4 x i32>, <4 x i32>* %p, align 4
+ %r = load <4 x i32>, ptr %p, align 4
ret <4 x i32> %r
; CHECK-LABEL: test_l_v4i32
-; CHECK: cost of 2 for instruction: %r = load <4 x i32>, <4 x i32>* %p, align 4
+; CHECK: cost of 2 for instruction: %r = load <4 x i32>, ptr %p, align 4
}
-define <8 x i32> @test_l_v8i32(<8 x i32>* %p) #0 {
+define <8 x i32> @test_l_v8i32(ptr %p) #0 {
entry:
- %r = load <8 x i32>, <8 x i32>* %p, align 4
+ %r = load <8 x i32>, ptr %p, align 4
ret <8 x i32> %r
; CHECK-LABEL: test_l_v8i32
-; CHECK: cost of 4 for instruction: %r = load <8 x i32>, <8 x i32>* %p, align 4
+; CHECK: cost of 4 for instruction: %r = load <8 x i32>, ptr %p, align 4
}
-define <2 x i64> @test_l_v2i64(<2 x i64>* %p) #0 {
+define <2 x i64> @test_l_v2i64(ptr %p) #0 {
entry:
- %r = load <2 x i64>, <2 x i64>* %p, align 8
+ %r = load <2 x i64>, ptr %p, align 8
ret <2 x i64> %r
; CHECK-LABEL: test_l_v2i64
-; CHECK: cost of 1 for instruction: %r = load <2 x i64>, <2 x i64>* %p, align 8
+; CHECK: cost of 1 for instruction: %r = load <2 x i64>, ptr %p, align 8
}
-define <4 x i64> @test_l_v4i64(<4 x i64>* %p) #0 {
+define <4 x i64> @test_l_v4i64(ptr %p) #0 {
entry:
- %r = load <4 x i64>, <4 x i64>* %p, align 8
+ %r = load <4 x i64>, ptr %p, align 8
ret <4 x i64> %r
; CHECK-LABEL: test_l_v4i64
-; CHECK: cost of 2 for instruction: %r = load <4 x i64>, <4 x i64>* %p, align 8
+; CHECK: cost of 2 for instruction: %r = load <4 x i64>, ptr %p, align 8
}
-define <4 x float> @test_l_v4float(<4 x float>* %p) #0 {
+define <4 x float> @test_l_v4float(ptr %p) #0 {
entry:
- %r = load <4 x float>, <4 x float>* %p, align 4
+ %r = load <4 x float>, ptr %p, align 4
ret <4 x float> %r
; CHECK-LABEL: test_l_v4float
-; CHECK: cost of 2 for instruction: %r = load <4 x float>, <4 x float>* %p, align 4
+; CHECK: cost of 2 for instruction: %r = load <4 x float>, ptr %p, align 4
}
-define <8 x float> @test_l_v8float(<8 x float>* %p) #0 {
+define <8 x float> @test_l_v8float(ptr %p) #0 {
entry:
- %r = load <8 x float>, <8 x float>* %p, align 4
+ %r = load <8 x float>, ptr %p, align 4
ret <8 x float> %r
; CHECK-LABEL: test_l_v8float
-; CHECK: cost of 4 for instruction: %r = load <8 x float>, <8 x float>* %p, align 4
+; CHECK: cost of 4 for instruction: %r = load <8 x float>, ptr %p, align 4
}
-define <2 x double> @test_l_v2double(<2 x double>* %p) #0 {
+define <2 x double> @test_l_v2double(ptr %p) #0 {
entry:
- %r = load <2 x double>, <2 x double>* %p, align 8
+ %r = load <2 x double>, ptr %p, align 8
ret <2 x double> %r
; CHECK-LABEL: test_l_v2double
-; CHECK: cost of 1 for instruction: %r = load <2 x double>, <2 x double>* %p, align 8
+; CHECK: cost of 1 for instruction: %r = load <2 x double>, ptr %p, align 8
}
-define <4 x double> @test_l_v4double(<4 x double>* %p) #0 {
+define <4 x double> @test_l_v4double(ptr %p) #0 {
entry:
- %r = load <4 x double>, <4 x double>* %p, align 8
+ %r = load <4 x double>, ptr %p, align 8
ret <4 x double> %r
; CHECK-LABEL: test_l_v4double
-; CHECK: cost of 2 for instruction: %r = load <4 x double>, <4 x double>* %p, align 8
+; CHECK: cost of 2 for instruction: %r = load <4 x double>, ptr %p, align 8
}
-define <16 x i8> @test_l_p8v16i8(<16 x i8>* %p) #2 {
+define <16 x i8> @test_l_p8v16i8(ptr %p) #2 {
entry:
- %r = load <16 x i8>, <16 x i8>* %p, align 1
+ %r = load <16 x i8>, ptr %p, align 1
ret <16 x i8> %r
; CHECK-LABEL: test_l_p8v16i8
-; CHECK: cost of 1 for instruction: %r = load <16 x i8>, <16 x i8>* %p, align 1
+; CHECK: cost of 1 for instruction: %r = load <16 x i8>, ptr %p, align 1
}
-define <32 x i8> @test_l_p8v32i8(<32 x i8>* %p) #2 {
+define <32 x i8> @test_l_p8v32i8(ptr %p) #2 {
entry:
- %r = load <32 x i8>, <32 x i8>* %p, align 1
+ %r = load <32 x i8>, ptr %p, align 1
ret <32 x i8> %r
; CHECK-LABEL: test_l_p8v32i8
-; CHECK: cost of 2 for instruction: %r = load <32 x i8>, <32 x i8>* %p, align 1
+; CHECK: cost of 2 for instruction: %r = load <32 x i8>, ptr %p, align 1
}
-define <8 x i16> @test_l_p8v8i16(<8 x i16>* %p) #2 {
+define <8 x i16> @test_l_p8v8i16(ptr %p) #2 {
entry:
- %r = load <8 x i16>, <8 x i16>* %p, align 2
+ %r = load <8 x i16>, ptr %p, align 2
ret <8 x i16> %r
; CHECK-LABEL: test_l_p8v8i16
-; CHECK: cost of 1 for instruction: %r = load <8 x i16>, <8 x i16>* %p, align 2
+; CHECK: cost of 1 for instruction: %r = load <8 x i16>, ptr %p, align 2
}
-define <16 x i16> @test_l_p8v16i16(<16 x i16>* %p) #2 {
+define <16 x i16> @test_l_p8v16i16(ptr %p) #2 {
entry:
- %r = load <16 x i16>, <16 x i16>* %p, align 2
+ %r = load <16 x i16>, ptr %p, align 2
ret <16 x i16> %r
; CHECK-LABEL: test_l_p8v16i16
-; CHECK: cost of 2 for instruction: %r = load <16 x i16>, <16 x i16>* %p, align 2
+; CHECK: cost of 2 for instruction: %r = load <16 x i16>, ptr %p, align 2
}
-define <4 x i32> @test_l_p8v4i32(<4 x i32>* %p) #2 {
+define <4 x i32> @test_l_p8v4i32(ptr %p) #2 {
entry:
- %r = load <4 x i32>, <4 x i32>* %p, align 4
+ %r = load <4 x i32>, ptr %p, align 4
ret <4 x i32> %r
; CHECK-LABEL: test_l_p8v4i32
-; CHECK: cost of 1 for instruction: %r = load <4 x i32>, <4 x i32>* %p, align 4
+; CHECK: cost of 1 for instruction: %r = load <4 x i32>, ptr %p, align 4
}
-define <8 x i32> @test_l_p8v8i32(<8 x i32>* %p) #2 {
+define <8 x i32> @test_l_p8v8i32(ptr %p) #2 {
entry:
- %r = load <8 x i32>, <8 x i32>* %p, align 4
+ %r = load <8 x i32>, ptr %p, align 4
ret <8 x i32> %r
; CHECK-LABEL: test_l_p8v8i32
-; CHECK: cost of 2 for instruction: %r = load <8 x i32>, <8 x i32>* %p, align 4
+; CHECK: cost of 2 for instruction: %r = load <8 x i32>, ptr %p, align 4
}
-define <2 x i64> @test_l_p8v2i64(<2 x i64>* %p) #2 {
+define <2 x i64> @test_l_p8v2i64(ptr %p) #2 {
entry:
- %r = load <2 x i64>, <2 x i64>* %p, align 8
+ %r = load <2 x i64>, ptr %p, align 8
ret <2 x i64> %r
; CHECK-LABEL: test_l_p8v2i64
-; CHECK: cost of 1 for instruction: %r = load <2 x i64>, <2 x i64>* %p, align 8
+; CHECK: cost of 1 for instruction: %r = load <2 x i64>, ptr %p, align 8
}
-define <4 x i64> @test_l_p8v4i64(<4 x i64>* %p) #2 {
+define <4 x i64> @test_l_p8v4i64(ptr %p) #2 {
entry:
- %r = load <4 x i64>, <4 x i64>* %p, align 8
+ %r = load <4 x i64>, ptr %p, align 8
ret <4 x i64> %r
; CHECK-LABEL: test_l_p8v4i64
-; CHECK: cost of 2 for instruction: %r = load <4 x i64>, <4 x i64>* %p, align 8
+; CHECK: cost of 2 for instruction: %r = load <4 x i64>, ptr %p, align 8
}
-define <4 x float> @test_l_p8v4float(<4 x float>* %p) #2 {
+define <4 x float> @test_l_p8v4float(ptr %p) #2 {
entry:
- %r = load <4 x float>, <4 x float>* %p, align 4
+ %r = load <4 x float>, ptr %p, align 4
ret <4 x float> %r
; CHECK-LABEL: test_l_p8v4float
-; CHECK: cost of 1 for instruction: %r = load <4 x float>, <4 x float>* %p, align 4
+; CHECK: cost of 1 for instruction: %r = load <4 x float>, ptr %p, align 4
}
-define <8 x float> @test_l_p8v8float(<8 x float>* %p) #2 {
+define <8 x float> @test_l_p8v8float(ptr %p) #2 {
entry:
- %r = load <8 x float>, <8 x float>* %p, align 4
+ %r = load <8 x float>, ptr %p, align 4
ret <8 x float> %r
; CHECK-LABEL: test_l_p8v8float
-; CHECK: cost of 2 for instruction: %r = load <8 x float>, <8 x float>* %p, align 4
+; CHECK: cost of 2 for instruction: %r = load <8 x float>, ptr %p, align 4
}
-define <2 x double> @test_l_p8v2double(<2 x double>* %p) #2 {
+define <2 x double> @test_l_p8v2double(ptr %p) #2 {
entry:
- %r = load <2 x double>, <2 x double>* %p, align 8
+ %r = load <2 x double>, ptr %p, align 8
ret <2 x double> %r
; CHECK-LABEL: test_l_p8v2double
-; CHECK: cost of 1 for instruction: %r = load <2 x double>, <2 x double>* %p, align 8
+; CHECK: cost of 1 for instruction: %r = load <2 x double>, ptr %p, align 8
}
-define <4 x double> @test_l_p8v4double(<4 x double>* %p) #2 {
+define <4 x double> @test_l_p8v4double(ptr %p) #2 {
entry:
- %r = load <4 x double>, <4 x double>* %p, align 8
+ %r = load <4 x double>, ptr %p, align 8
ret <4 x double> %r
; CHECK-LABEL: test_l_p8v4double
-; CHECK: cost of 2 for instruction: %r = load <4 x double>, <4 x double>* %p, align 8
+; CHECK: cost of 2 for instruction: %r = load <4 x double>, ptr %p, align 8
}
-define void @test_s_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 {
+define void @test_s_v16i8(ptr %p, <16 x i8> %v) #0 {
entry:
- store <16 x i8> %v, <16 x i8>* %p, align 1
+ store <16 x i8> %v, ptr %p, align 1
ret void
; CHECK-LABEL: test_s_v16i8
-; CHECK: cost of 1 for instruction: store <16 x i8> %v, <16 x i8>* %p, align 1
+; CHECK: cost of 1 for instruction: store <16 x i8> %v, ptr %p, align 1
}
-define void @test_s_v32i8(<32 x i8>* %p, <32 x i8> %v) #0 {
+define void @test_s_v32i8(ptr %p, <32 x i8> %v) #0 {
entry:
- store <32 x i8> %v, <32 x i8>* %p, align 1
+ store <32 x i8> %v, ptr %p, align 1
ret void
; CHECK-LABEL: test_s_v32i8
-; CHECK: cost of 2 for instruction: store <32 x i8> %v, <32 x i8>* %p, align 1
+; CHECK: cost of 2 for instruction: store <32 x i8> %v, ptr %p, align 1
}
-define void @test_s_v8i16(<8 x i16>* %p, <8 x i16> %v) #0 {
+define void @test_s_v8i16(ptr %p, <8 x i16> %v) #0 {
entry:
- store <8 x i16> %v, <8 x i16>* %p, align 2
+ store <8 x i16> %v, ptr %p, align 2
ret void
; CHECK-LABEL: test_s_v8i16
-; CHECK: cost of 1 for instruction: store <8 x i16> %v, <8 x i16>* %p, align 2
+; CHECK: cost of 1 for instruction: store <8 x i16> %v, ptr %p, align 2
}
-define void @test_s_v16i16(<16 x i16>* %p, <16 x i16> %v) #0 {
+define void @test_s_v16i16(ptr %p, <16 x i16> %v) #0 {
entry:
- store <16 x i16> %v, <16 x i16>* %p, align 2
+ store <16 x i16> %v, ptr %p, align 2
ret void
; CHECK-LABEL: test_s_v16i16
-; CHECK: cost of 2 for instruction: store <16 x i16> %v, <16 x i16>* %p, align 2
+; CHECK: cost of 2 for instruction: store <16 x i16> %v, ptr %p, align 2
}
-define void @test_s_v4i32(<4 x i32>* %p, <4 x i32> %v) #0 {
+define void @test_s_v4i32(ptr %p, <4 x i32> %v) #0 {
entry:
- store <4 x i32> %v, <4 x i32>* %p, align 4
+ store <4 x i32> %v, ptr %p, align 4
ret void
; CHECK-LABEL: test_s_v4i32
-; CHECK: cost of 1 for instruction: store <4 x i32> %v, <4 x i32>* %p, align 4
+; CHECK: cost of 1 for instruction: store <4 x i32> %v, ptr %p, align 4
}
-define void @test_s_v8i32(<8 x i32>* %p, <8 x i32> %v) #0 {
+define void @test_s_v8i32(ptr %p, <8 x i32> %v) #0 {
entry:
- store <8 x i32> %v, <8 x i32>* %p, align 4
+ store <8 x i32> %v, ptr %p, align 4
ret void
; CHECK-LABEL: test_s_v8i32
-; CHECK: cost of 2 for instruction: store <8 x i32> %v, <8 x i32>* %p, align 4
+; CHECK: cost of 2 for instruction: store <8 x i32> %v, ptr %p, align 4
}
-define void @test_s_v2i64(<2 x i64>* %p, <2 x i64> %v) #0 {
+define void @test_s_v2i64(ptr %p, <2 x i64> %v) #0 {
entry:
- store <2 x i64> %v, <2 x i64>* %p, align 8
+ store <2 x i64> %v, ptr %p, align 8
ret void
; CHECK-LABEL: test_s_v2i64
-; CHECK: cost of 1 for instruction: store <2 x i64> %v, <2 x i64>* %p, align 8
+; CHECK: cost of 1 for instruction: store <2 x i64> %v, ptr %p, align 8
}
-define void @test_s_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 {
+define void @test_s_v4i64(ptr %p, <4 x i64> %v) #0 {
entry:
- store <4 x i64> %v, <4 x i64>* %p, align 8
+ store <4 x i64> %v, ptr %p, align 8
ret void
; CHECK-LABEL: test_s_v4i64
-; CHECK: cost of 2 for instruction: store <4 x i64> %v, <4 x i64>* %p, align 8
+; CHECK: cost of 2 for instruction: store <4 x i64> %v, ptr %p, align 8
}
-define void @test_s_v4float(<4 x float>* %p, <4 x float> %v) #0 {
+define void @test_s_v4float(ptr %p, <4 x float> %v) #0 {
entry:
- store <4 x float> %v, <4 x float>* %p, align 4
+ store <4 x float> %v, ptr %p, align 4
ret void
; CHECK-LABEL: test_s_v4float
-; CHECK: cost of 1 for instruction: store <4 x float> %v, <4 x float>* %p, align 4
+; CHECK: cost of 1 for instruction: store <4 x float> %v, ptr %p, align 4
}
-define void @test_s_v8float(<8 x float>* %p, <8 x float> %v) #0 {
+define void @test_s_v8float(ptr %p, <8 x float> %v) #0 {
entry:
- store <8 x float> %v, <8 x float>* %p, align 4
+ store <8 x float> %v, ptr %p, align 4
ret void
; CHECK-LABEL: test_s_v8float
-; CHECK: cost of 2 for instruction: store <8 x float> %v, <8 x float>* %p, align 4
+; CHECK: cost of 2 for instruction: store <8 x float> %v, ptr %p, align 4
}
-define void @test_s_v2double(<2 x double>* %p, <2 x double> %v) #0 {
+define void @test_s_v2double(ptr %p, <2 x double> %v) #0 {
entry:
- store <2 x double> %v, <2 x double>* %p, align 8
+ store <2 x double> %v, ptr %p, align 8
ret void
; CHECK-LABEL: test_s_v2double
-; CHECK: cost of 1 for instruction: store <2 x double> %v, <2 x double>* %p, align 8
+; CHECK: cost of 1 for instruction: store <2 x double> %v, ptr %p, align 8
}
-define void @test_s_v4double(<4 x double>* %p, <4 x double> %v) #0 {
+define void @test_s_v4double(ptr %p, <4 x double> %v) #0 {
entry:
- store <4 x double> %v, <4 x double>* %p, align 8
+ store <4 x double> %v, ptr %p, align 8
ret void
; CHECK-LABEL: test_s_v4double
-; CHECK: cost of 2 for instruction: store <4 x double> %v, <4 x double>* %p, align 8
+; CHECK: cost of 2 for instruction: store <4 x double> %v, ptr %p, align 8
}
attributes #0 = { nounwind "target-cpu"="pwr7" }
define i32 @test(i32 %arg) {
; CHECK: cost of 1 {{.*}} load
- load i8, i8* undef, align 1
+ load i8, ptr undef, align 1
; CHECK: cost of 1 {{.*}} load
- load i16, i16* undef, align 1
+ load i16, ptr undef, align 1
; CHECK: cost of 1 {{.*}} load
- load i32, i32* undef, align 1
+ load i32, ptr undef, align 1
; CHECK: cost of 1 {{.*}} load
- load i64, i64* undef, align 1
+ load i64, ptr undef, align 1
; CHECK: cost of 1 {{.*}} store
- store i8 undef, i8* undef, align 1
+ store i8 undef, ptr undef, align 1
; CHECK: cost of 1 {{.*}} store
- store i16 undef, i16* undef, align 1
+ store i16 undef, ptr undef, align 1
; CHECK: cost of 1 {{.*}} store
- store i32 undef, i32* undef, align 1
+ store i32 undef, ptr undef, align 1
; CHECK: cost of 1 {{.*}} store
- store i64 undef, i64* undef, align 1
+ store i64 undef, ptr undef, align 1
ret i32 undef
}
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck %s
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr9 -mattr=+vsx | FileCheck --check-prefix=CHECK-P9 %s
-define void @testi16(i16 %arg1, i16 %arg2, i16* %arg3) {
+define void @testi16(i16 %arg1, i16 %arg2, ptr %arg3) {
%s1 = add i16 %arg1, %arg2
%s2 = zext i16 %arg1 to i32
- %s3 = load i16, i16* %arg3
- store i16 %arg2, i16* %arg3
+ %s3 = load i16, ptr %arg3
+ store i16 %arg2, ptr %arg3
%c = icmp eq i16 %arg1, %arg2
ret void
; CHECK-P9: cost of 2 {{.*}} icmp
}
-define void @test4xi32(<4 x i32> %arg1, <4 x i32> %arg2, <4 x i32>* %arg3) {
+define void @test4xi32(<4 x i32> %arg1, <4 x i32> %arg2, ptr %arg3) {
- %v1 = load <4 x i32>, <4 x i32>* %arg3
- store <4 x i32> %arg2, <4 x i32>* %arg3
+ %v1 = load <4 x i32>, ptr %arg3
+ store <4 x i32> %arg2, ptr %arg3
ret void
; CHECK: cost of 1 {{.*}} load
define i32 @loads(i32 %arg) {
; CHECK: cost of 1 {{.*}} load
- load <4 x i8>, <4 x i8>* undef, align 1
+ load <4 x i8>, ptr undef, align 1
; CHECK: cost of 1 {{.*}} load
- load <8 x i8>, <8 x i8>* undef, align 1
+ load <8 x i8>, ptr undef, align 1
; CHECK: cost of 1 {{.*}} load
- load <2 x i16>, <2 x i16>* undef, align 2
+ load <2 x i16>, ptr undef, align 2
; CHECK: cost of 1 {{.*}} load
- load <4 x i16>, <4 x i16>* undef, align 2
+ load <4 x i16>, ptr undef, align 2
ret i32 undef
}
define i32 @masked_gather() {
; CHECK-LABEL: 'masked_gather'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 8, <8 x i1> undef, <8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 8, <4 x i1> undef, <4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 8, <2 x i1> undef, <2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 8, <1 x i1> undef, <1 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 4, <16 x i1> undef, <16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 4, <8 x i1> undef, <8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 4, <4 x i1> undef, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 4, <2 x i1> undef, <2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F32 = call <1 x float> @llvm.masked.gather.v1f32.v1p0f32(<1 x float*> undef, i32 4, <1 x i1> undef, <1 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32F16 = call <32 x half> @llvm.masked.gather.v32f16.v32p0f16(<32 x half*> undef, i32 2, <32 x i1> undef, <32 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F16 = call <16 x half> @llvm.masked.gather.v16f16.v16p0f16(<16 x half*> undef, i32 2, <16 x i1> undef, <16 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F16 = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> undef, i32 2, <8 x i1> undef, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F16 = call <4 x half> @llvm.masked.gather.v4f16.v4p0f16(<4 x half*> undef, i32 2, <4 x i1> undef, <4 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = call <2 x half> @llvm.masked.gather.v2f16.v2p0f16(<2 x half*> undef, i32 2, <2 x i1> undef, <2 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = call <1 x half> @llvm.masked.gather.v1f16.v1p0f16(<1 x half*> undef, i32 2, <1 x i1> undef, <1 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 8, <8 x i1> undef, <8 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 8, <4 x i1> undef, <4 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 8, <2 x i1> undef, <2 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 8, <1 x i1> undef, <1 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 4, <16 x i1> undef, <16 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 4, <8 x i1> undef, <8 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 4, <4 x i1> undef, <4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 4, <2 x i1> undef, <2 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I32 = call <1 x i32> @llvm.masked.gather.v1i32.v1p0i32(<1 x i32*> undef, i32 4, <1 x i1> undef, <1 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 2, <32 x i1> undef, <32 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 2, <16 x i1> undef, <16 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 2, <8 x i1> undef, <8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 2, <4 x i1> undef, <4 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.masked.gather.v2i16.v2p0i16(<2 x i16*> undef, i32 2, <2 x i1> undef, <2 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I16 = call <1 x i16> @llvm.masked.gather.v1i16.v1p0i16(<1 x i16*> undef, i32 2, <1 x i1> undef, <1 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> undef, i32 1, <4 x i1> undef, <4 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0i8(<2 x i8*> undef, i32 1, <2 x i1> undef, <2 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I8 = call <1 x i8> @llvm.masked.gather.v1i8.v1p0i8(<1 x i8*> undef, i32 1, <1 x i1> undef, <1 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64.u = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 2, <8 x i1> undef, <8 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64.u = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 2, <4 x i1> undef, <4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64.u = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 2, <2 x i1> undef, <2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64.u = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 2, <1 x i1> undef, <1 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32.u = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 2, <16 x i1> undef, <16 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F32.u = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 2, <8 x i1> undef, <8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32.u = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 2, <4 x i1> undef, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32.u = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 2, <2 x i1> undef, <2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32.u = call <1 x float> @llvm.masked.gather.v1f32.v1p0f32(<1 x float*> undef, i32 2, <1 x i1> undef, <1 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32F16.u = call <32 x half> @llvm.masked.gather.v32f16.v32p0f16(<32 x half*> undef, i32 1, <32 x i1> undef, <32 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F16.u = call <16 x half> @llvm.masked.gather.v16f16.v16p0f16(<16 x half*> undef, i32 1, <16 x i1> undef, <16 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F16.u = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> undef, i32 1, <8 x i1> undef, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F16.u = call <4 x half> @llvm.masked.gather.v4f16.v4p0f16(<4 x half*> undef, i32 1, <4 x i1> undef, <4 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F16.u = call <2 x half> @llvm.masked.gather.v2f16.v2p0f16(<2 x half*> undef, i32 1, <2 x i1> undef, <2 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16.u = call <1 x half> @llvm.masked.gather.v1f16.v1p0f16(<1 x half*> undef, i32 1, <1 x i1> undef, <1 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64.u = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 4, <8 x i1> undef, <8 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64.u = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 4, <4 x i1> undef, <4 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64.u = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 4, <2 x i1> undef, <2 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64.u = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 4, <1 x i1> undef, <1 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32.u = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32.u = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32.u = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32.u = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32.u = call <1 x i32> @llvm.masked.gather.v1i32.v1p0i32(<1 x i32*> undef, i32 1, <1 x i1> undef, <1 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16.u = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16.u = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I16.u = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I16.u = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16.u = call <2 x i16> @llvm.masked.gather.v2i16.v2p0i16(<2 x i16*> undef, i32 1, <2 x i1> undef, <2 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I16.u = call <1 x i16> @llvm.masked.gather.v1i16.v1p0i16(<1 x i16*> undef, i32 1, <1 x i1> undef, <1 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 8, <8 x i1> undef, <8 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 8, <4 x i1> undef, <4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 8, <2 x i1> undef, <2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 8, <1 x i1> undef, <1 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 4, <16 x i1> undef, <16 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 4, <8 x i1> undef, <8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F32 = call <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr> undef, i32 4, <1 x i1> undef, <1 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32F16 = call <32 x half> @llvm.masked.gather.v32f16.v32p0(<32 x ptr> undef, i32 2, <32 x i1> undef, <32 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F16 = call <16 x half> @llvm.masked.gather.v16f16.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F16 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F16 = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F16 = call <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr> undef, i32 2, <1 x i1> undef, <1 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 8, <8 x i1> undef, <8 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 8, <4 x i1> undef, <4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 8, <2 x i1> undef, <2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 8, <1 x i1> undef, <1 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 4, <16 x i1> undef, <16 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 4, <8 x i1> undef, <8 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I32 = call <1 x i32> @llvm.masked.gather.v1i32.v1p0(<1 x ptr> undef, i32 4, <1 x i1> undef, <1 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 2, <32 x i1> undef, <32 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I16 = call <1 x i16> @llvm.masked.gather.v1i16.v1p0(<1 x ptr> undef, i32 2, <1 x i1> undef, <1 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I8 = call <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64.u = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64.u = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64.u = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64.u = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 2, <1 x i1> undef, <1 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32.u = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F32.u = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32.u = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32.u = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32.u = call <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr> undef, i32 2, <1 x i1> undef, <1 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32F16.u = call <32 x half> @llvm.masked.gather.v32f16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F16.u = call <16 x half> @llvm.masked.gather.v16f16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F16.u = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F16.u = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F16.u = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16.u = call <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64.u = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 4, <8 x i1> undef, <8 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64.u = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64.u = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64.u = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 4, <1 x i1> undef, <1 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32.u = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32.u = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32.u = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32.u = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32.u = call <1 x i32> @llvm.masked.gather.v1i32.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16.u = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16.u = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I16.u = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I16.u = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16.u = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I16.u = call <1 x i16> @llvm.masked.gather.v1i16.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i16> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0
;
- %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 8, <8 x i1> undef, <8 x double> undef)
- %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 8, <4 x i1> undef, <4 x double> undef)
- %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 8, <2 x i1> undef, <2 x double> undef)
- %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 8, <1 x i1> undef, <1 x double> undef)
-
- %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 4, <16 x i1> undef, <16 x float> undef)
- %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 4, <8 x i1> undef, <8 x float> undef)
- %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 4, <4 x i1> undef, <4 x float> undef)
- %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 4, <2 x i1> undef, <2 x float> undef)
- %V1F32 = call <1 x float> @llvm.masked.gather.v1f32.v1p0f32(<1 x float*> undef, i32 4, <1 x i1> undef, <1 x float> undef)
-
- %V32F16 = call <32 x half> @llvm.masked.gather.v32f16.v32p0f16(<32 x half*> undef, i32 2, <32 x i1> undef, <32 x half> undef)
- %V16F16 = call <16 x half> @llvm.masked.gather.v16f16.v16p0f16(<16 x half*> undef, i32 2, <16 x i1> undef, <16 x half> undef)
- %V8F16 = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> undef, i32 2, <8 x i1> undef, <8 x half> undef)
- %V4F16 = call <4 x half> @llvm.masked.gather.v4f16.v4p0f16(<4 x half*> undef, i32 2, <4 x i1> undef, <4 x half> undef)
- %V2F16 = call <2 x half> @llvm.masked.gather.v2f16.v2p0f16(<2 x half*> undef, i32 2, <2 x i1> undef, <2 x half> undef)
- %V1F16 = call <1 x half> @llvm.masked.gather.v1f16.v1p0f16(<1 x half*> undef, i32 2, <1 x i1> undef, <1 x half> undef)
-
- %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 8, <8 x i1> undef, <8 x i64> undef)
- %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 8, <4 x i1> undef, <4 x i64> undef)
- %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 8, <2 x i1> undef, <2 x i64> undef)
- %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 8, <1 x i1> undef, <1 x i64> undef)
-
- %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 4, <16 x i1> undef, <16 x i32> undef)
- %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 4, <8 x i1> undef, <8 x i32> undef)
- %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 4, <4 x i1> undef, <4 x i32> undef)
- %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 4, <2 x i1> undef, <2 x i32> undef)
- %V1I32 = call <1 x i32> @llvm.masked.gather.v1i32.v1p0i32(<1 x i32*> undef, i32 4, <1 x i1> undef, <1 x i32> undef)
-
- %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 2, <32 x i1> undef, <32 x i16> undef)
- %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 2, <16 x i1> undef, <16 x i16> undef)
- %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 2, <8 x i1> undef, <8 x i16> undef)
- %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 2, <4 x i1> undef, <4 x i16> undef)
- %V2I16 = call <2 x i16> @llvm.masked.gather.v2i16.v2p0i16(<2 x i16*> undef, i32 2, <2 x i1> undef, <2 x i16> undef)
- %V1I16 = call <1 x i16> @llvm.masked.gather.v1i16.v1p0i16(<1 x i16*> undef, i32 2, <1 x i1> undef, <1 x i16> undef)
-
- %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
- %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
- %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
- %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
- %V4I8 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> undef, i32 1, <4 x i1> undef, <4 x i8> undef)
- %V2I8 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0i8(<2 x i8*> undef, i32 1, <2 x i1> undef, <2 x i8> undef)
- %V1I8 = call <1 x i8> @llvm.masked.gather.v1i8.v1p0i8(<1 x i8*> undef, i32 1, <1 x i1> undef, <1 x i8> undef)
+ %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 8, <8 x i1> undef, <8 x double> undef)
+ %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 8, <4 x i1> undef, <4 x double> undef)
+ %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 8, <2 x i1> undef, <2 x double> undef)
+ %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 8, <1 x i1> undef, <1 x double> undef)
+
+ %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 4, <16 x i1> undef, <16 x float> undef)
+ %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 4, <8 x i1> undef, <8 x float> undef)
+ %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x float> undef)
+ %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x float> undef)
+ %V1F32 = call <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr> undef, i32 4, <1 x i1> undef, <1 x float> undef)
+
+ %V32F16 = call <32 x half> @llvm.masked.gather.v32f16.v32p0(<32 x ptr> undef, i32 2, <32 x i1> undef, <32 x half> undef)
+ %V16F16 = call <16 x half> @llvm.masked.gather.v16f16.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x half> undef)
+ %V8F16 = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x half> undef)
+ %V4F16 = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x half> undef)
+ %V2F16 = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x half> undef)
+ %V1F16 = call <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr> undef, i32 2, <1 x i1> undef, <1 x half> undef)
+
+ %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 8, <8 x i1> undef, <8 x i64> undef)
+ %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 8, <4 x i1> undef, <4 x i64> undef)
+ %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 8, <2 x i1> undef, <2 x i64> undef)
+ %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 8, <1 x i1> undef, <1 x i64> undef)
+
+ %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 4, <16 x i1> undef, <16 x i32> undef)
+ %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 4, <8 x i1> undef, <8 x i32> undef)
+ %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x i32> undef)
+ %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x i32> undef)
+ %V1I32 = call <1 x i32> @llvm.masked.gather.v1i32.v1p0(<1 x ptr> undef, i32 4, <1 x i1> undef, <1 x i32> undef)
+
+ %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 2, <32 x i1> undef, <32 x i16> undef)
+ %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x i16> undef)
+ %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x i16> undef)
+ %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x i16> undef)
+ %V2I16 = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x i16> undef)
+ %V1I16 = call <1 x i16> @llvm.masked.gather.v1i16.v1p0(<1 x ptr> undef, i32 2, <1 x i1> undef, <1 x i16> undef)
+
+ %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+ %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+ %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+ %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+ %V4I8 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i8> undef)
+ %V2I8 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i8> undef)
+ %V1I8 = call <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i8> undef)
; Test unaligned gathers
- %V8F64.u = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 2, <8 x i1> undef, <8 x double> undef)
- %V4F64.u = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 2, <4 x i1> undef, <4 x double> undef)
- %V2F64.u = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 2, <2 x i1> undef, <2 x double> undef)
- %V1F64.u = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 2, <1 x i1> undef, <1 x double> undef)
-
- %V16F32.u = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 2, <16 x i1> undef, <16 x float> undef)
- %V8F32.u = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 2, <8 x i1> undef, <8 x float> undef)
- %V4F32.u = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 2, <4 x i1> undef, <4 x float> undef)
- %V2F32.u = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 2, <2 x i1> undef, <2 x float> undef)
- %V1F32.u = call <1 x float> @llvm.masked.gather.v1f32.v1p0f32(<1 x float*> undef, i32 2, <1 x i1> undef, <1 x float> undef)
-
- %V32F16.u = call <32 x half> @llvm.masked.gather.v32f16.v32p0f16(<32 x half*> undef, i32 1, <32 x i1> undef, <32 x half> undef)
- %V16F16.u = call <16 x half> @llvm.masked.gather.v16f16.v16p0f16(<16 x half*> undef, i32 1, <16 x i1> undef, <16 x half> undef)
- %V8F16.u = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> undef, i32 1, <8 x i1> undef, <8 x half> undef)
- %V4F16.u = call <4 x half> @llvm.masked.gather.v4f16.v4p0f16(<4 x half*> undef, i32 1, <4 x i1> undef, <4 x half> undef)
- %V2F16.u = call <2 x half> @llvm.masked.gather.v2f16.v2p0f16(<2 x half*> undef, i32 1, <2 x i1> undef, <2 x half> undef)
- %V1F16.u = call <1 x half> @llvm.masked.gather.v1f16.v1p0f16(<1 x half*> undef, i32 1, <1 x i1> undef, <1 x half> undef)
-
- %V8I64.u = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 4, <8 x i1> undef, <8 x i64> undef)
- %V4I64.u = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 4, <4 x i1> undef, <4 x i64> undef)
- %V2I64.u = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 4, <2 x i1> undef, <2 x i64> undef)
- %V1I64.u = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 4, <1 x i1> undef, <1 x i64> undef)
-
- %V16I32.u = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
- %V8I32.u = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
- %V4I32.u = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
- %V2I32.u = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
- %V1I32.u = call <1 x i32> @llvm.masked.gather.v1i32.v1p0i32(<1 x i32*> undef, i32 1, <1 x i1> undef, <1 x i32> undef)
-
- %V32I16.u = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
- %V16I16.u = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
- %V8I16.u = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
- %V4I16.u = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
- %V2I16.u = call <2 x i16> @llvm.masked.gather.v2i16.v2p0i16(<2 x i16*> undef, i32 1, <2 x i1> undef, <2 x i16> undef)
- %V1I16.u = call <1 x i16> @llvm.masked.gather.v1i16.v1p0i16(<1 x i16*> undef, i32 1, <1 x i1> undef, <1 x i16> undef)
+ %V8F64.u = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x double> undef)
+ %V4F64.u = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x double> undef)
+ %V2F64.u = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x double> undef)
+ %V1F64.u = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 2, <1 x i1> undef, <1 x double> undef)
+
+ %V16F32.u = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x float> undef)
+ %V8F32.u = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x float> undef)
+ %V4F32.u = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x float> undef)
+ %V2F32.u = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x float> undef)
+ %V1F32.u = call <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr> undef, i32 2, <1 x i1> undef, <1 x float> undef)
+
+ %V32F16.u = call <32 x half> @llvm.masked.gather.v32f16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x half> undef)
+ %V16F16.u = call <16 x half> @llvm.masked.gather.v16f16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x half> undef)
+ %V8F16.u = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x half> undef)
+ %V4F16.u = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x half> undef)
+ %V2F16.u = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x half> undef)
+ %V1F16.u = call <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x half> undef)
+
+ %V8I64.u = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 4, <8 x i1> undef, <8 x i64> undef)
+ %V4I64.u = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x i64> undef)
+ %V2I64.u = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x i64> undef)
+ %V1I64.u = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 4, <1 x i1> undef, <1 x i64> undef)
+
+ %V16I32.u = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+ %V8I32.u = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+ %V4I32.u = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+ %V2I32.u = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+ %V1I32.u = call <1 x i32> @llvm.masked.gather.v1i32.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i32> undef)
+
+ %V32I16.u = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+ %V16I16.u = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+ %V8I16.u = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+ %V4I16.u = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+ %V2I16.u = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i16> undef)
+ %V1I16.u = call <1 x i16> @llvm.masked.gather.v1i16.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i16> undef)
ret i32 0
}
-declare <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*>, i32, <8 x i1>, <8 x double>)
-declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*>, i32, <4 x i1>, <4 x double>)
-declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*>, i32, <2 x i1>, <2 x double>)
-declare <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*>, i32, <1 x i1>, <1 x double>)
-
-declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
-declare <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*>, i32, <8 x i1>, <8 x float>)
-declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*>, i32, <4 x i1>, <4 x float>)
-declare <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*>, i32, <2 x i1>, <2 x float>)
-declare <1 x float> @llvm.masked.gather.v1f32.v1p0f32(<1 x float*>, i32, <1 x i1>, <1 x float>)
-
-declare <32 x half> @llvm.masked.gather.v32f16.v32p0f16(<32 x half*>, i32, <32 x i1>, <32 x half>)
-declare <16 x half> @llvm.masked.gather.v16f16.v16p0f16(<16 x half*>, i32, <16 x i1>, <16 x half>)
-declare <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*>, i32, <8 x i1>, <8 x half>)
-declare <4 x half> @llvm.masked.gather.v4f16.v4p0f16(<4 x half*>, i32, <4 x i1>, <4 x half>)
-declare <2 x half> @llvm.masked.gather.v2f16.v2p0f16(<2 x half*>, i32, <2 x i1>, <2 x half>)
-declare <1 x half> @llvm.masked.gather.v1f16.v1p0f16(<1 x half*>, i32, <1 x i1>, <1 x half>)
-
-declare <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*>, i32, <8 x i1>, <8 x i64>)
-declare <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>)
-declare <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>)
-declare <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*>, i32, <1 x i1>, <1 x i64>)
-
-declare <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>)
-declare <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>, i32, <8 x i1>, <8 x i32>)
-declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
-declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
-declare <1 x i32> @llvm.masked.gather.v1i32.v1p0i32(<1 x i32*>, i32, <1 x i1>, <1 x i32>)
-
-declare <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*>, i32, <32 x i1>, <32 x i16>)
-declare <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*>, i32, <16 x i1>, <16 x i16>)
-declare <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*>, i32, <8 x i1>, <8 x i16>)
-declare <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*>, i32, <4 x i1>, <4 x i16>)
-declare <2 x i16> @llvm.masked.gather.v2i16.v2p0i16(<2 x i16*>, i32, <2 x i1>, <2 x i16>)
-declare <1 x i16> @llvm.masked.gather.v1i16.v1p0i16(<1 x i16*>, i32, <1 x i1>, <1 x i16>)
-
-declare <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*>, i32, <64 x i1>, <64 x i8>)
-declare <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*>, i32, <32 x i1>, <32 x i8>)
-declare <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*>, i32, <16 x i1>, <16 x i8>)
-declare <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*>, i32, <8 x i1>, <8 x i8>)
-declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32, <4 x i1>, <4 x i8>)
-declare <2 x i8> @llvm.masked.gather.v2i8.v2p0i8(<2 x i8*>, i32, <2 x i1>, <2 x i8>)
-declare <1 x i8> @llvm.masked.gather.v1i8.v1p0i8(<1 x i8*>, i32, <1 x i1>, <1 x i8>)
+declare <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x double>)
+declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x double>)
+declare <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x double>)
+declare <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x double>)
+
+declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x float>)
+declare <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x float>)
+declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>)
+declare <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x float>)
+declare <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x float>)
+
+declare <32 x half> @llvm.masked.gather.v32f16.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x half>)
+declare <16 x half> @llvm.masked.gather.v16f16.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x half>)
+declare <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x half>)
+declare <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x half>)
+declare <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x half>)
+declare <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x half>)
+
+declare <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i64>)
+declare <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i64>)
+declare <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i64>)
+declare <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i64>)
+
+declare <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i32>)
+declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i32>)
+declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
+declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i32>)
+declare <1 x i32> @llvm.masked.gather.v1i32.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i32>)
+
+declare <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i16>)
+declare <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i16>)
+declare <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i16>)
+declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>)
+declare <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i16>)
+declare <1 x i16> @llvm.masked.gather.v1i16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i16>)
+
+declare <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr>, i32, <64 x i1>, <64 x i8>)
+declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i8>)
+declare <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i8>)
+declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>)
+declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i8>)
+declare <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i8>)
+declare <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i8>)
define i32 @masked_scatter() {
; CHECK-LABEL: 'masked_scatter'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 8, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 8, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 8, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 8, <1 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 4, <16 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 4, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 4, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 4, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f32.v1p0f32(<1 x float> undef, <1 x float*> undef, i32 4, <1 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v32f16.v32p0f16(<32 x half> undef, <32 x half*> undef, i32 2, <32 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v16f16.v16p0f16(<16 x half> undef, <16 x half*> undef, i32 2, <16 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> undef, <8 x half*> undef, i32 2, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half> undef, <4 x half*> undef, i32 2, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2f16.v2p0f16(<2 x half> undef, <2 x half*> undef, i32 2, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f16.v1p0f16(<1 x half> undef, <1 x half*> undef, i32 2, <1 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 8, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 8, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 8, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 8, <1 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 4, <16 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 4, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 4, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 4, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i32.v1p0i32(<1 x i32> undef, <1 x i32*> undef, i32 4, <1 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 2, <32 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 2, <16 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 2, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 2, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2i16.v2p0i16(<2 x i16> undef, <2 x i16*> undef, i32 2, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i16.v1p0i16(<1 x i16> undef, <1 x i16*> undef, i32 2, <1 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> undef, <4 x i8*> undef, i32 1, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2i8.v2p0i8(<2 x i8> undef, <2 x i8*> undef, i32 1, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i8.v1p0i8(<1 x i8> undef, <1 x i8*> undef, i32 1, <1 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 2, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 2, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 2, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 2, <1 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 2, <16 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 2, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 2, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 2, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f32.v1p0f32(<1 x float> undef, <1 x float*> undef, i32 2, <1 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32f16.v32p0f16(<32 x half> undef, <32 x half*> undef, i32 1, <32 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16f16.v16p0f16(<16 x half> undef, <16 x half*> undef, i32 1, <16 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> undef, <8 x half*> undef, i32 1, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half> undef, <4 x half*> undef, i32 1, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2f16.v2p0f16(<2 x half> undef, <2 x half*> undef, i32 1, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f16.v1p0f16(<1 x half> undef, <1 x half*> undef, i32 1, <1 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i32.v1p0i32(<1 x i32> undef, <1 x i32*> undef, i32 1, <1 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2i16.v2p0i16(<2 x i16> undef, <2 x i16*> undef, i32 1, <2 x i1> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i16.v1p0i16(<1 x i16> undef, <1 x i16*> undef, i32 1, <1 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 8, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 8, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 8, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 8, <1 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 4, <16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 4, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f32.v1p0(<1 x float> undef, <1 x ptr> undef, i32 4, <1 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v32f16.v32p0(<32 x half> undef, <32 x ptr> undef, i32 2, <32 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v16f16.v16p0(<16 x half> undef, <16 x ptr> undef, i32 2, <16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f16.v1p0(<1 x half> undef, <1 x ptr> undef, i32 2, <1 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 8, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 8, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 8, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 8, <1 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 4, <16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 4, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i32.v1p0(<1 x i32> undef, <1 x ptr> undef, i32 4, <1 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 2, <32 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 2, <16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i16.v1p0(<1 x i16> undef, <1 x ptr> undef, i32 2, <1 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i8.v1p0(<1 x i8> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 2, <1 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 2, <16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f32.v1p0(<1 x float> undef, <1 x ptr> undef, i32 2, <1 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32f16.v32p0(<32 x half> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16f16.v16p0(<16 x half> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f16.v1p0(<1 x half> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i32.v1p0(<1 x i32> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i16.v1p0(<1 x i16> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0
;
- call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 8, <8 x i1> undef)
- call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 8, <4 x i1> undef)
- call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 8, <2 x i1> undef)
- call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 8, <1 x i1> undef)
-
- call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 4, <16 x i1> undef)
- call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 4, <8 x i1> undef)
- call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 4, <4 x i1> undef)
- call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 4, <2 x i1> undef)
- call void @llvm.masked.scatter.v1f32.v1p0f32(<1 x float> undef, <1 x float*> undef, i32 4, <1 x i1> undef)
-
- call void @llvm.masked.scatter.v32f16.v32p0f16(<32 x half> undef, <32 x half*> undef, i32 2, <32 x i1> undef)
- call void @llvm.masked.scatter.v16f16.v16p0f16(<16 x half> undef, <16 x half*> undef, i32 2, <16 x i1> undef)
- call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> undef, <8 x half*> undef, i32 2, <8 x i1> undef)
- call void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half> undef, <4 x half*> undef, i32 2, <4 x i1> undef)
- call void @llvm.masked.scatter.v2f16.v2p0f16(<2 x half> undef, <2 x half*> undef, i32 2, <2 x i1> undef)
- call void @llvm.masked.scatter.v1f16.v1p0f16(<1 x half> undef, <1 x half*> undef, i32 2, <1 x i1> undef)
-
- call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 8, <8 x i1> undef)
- call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 8, <4 x i1> undef)
- call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 8, <2 x i1> undef)
- call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 8, <1 x i1> undef)
-
- call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 4, <16 x i1> undef)
- call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 4, <8 x i1> undef)
- call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 4, <4 x i1> undef)
- call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 4, <2 x i1> undef)
- call void @llvm.masked.scatter.v1i32.v1p0i32(<1 x i32> undef, <1 x i32*> undef, i32 4, <1 x i1> undef)
-
- call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 2, <32 x i1> undef)
- call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 2, <16 x i1> undef)
- call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 2, <8 x i1> undef)
- call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 2, <4 x i1> undef)
- call void @llvm.masked.scatter.v2i16.v2p0i16(<2 x i16> undef, <2 x i16*> undef, i32 2, <2 x i1> undef)
- call void @llvm.masked.scatter.v1i16.v1p0i16(<1 x i16> undef, <1 x i16*> undef, i32 2, <1 x i1> undef)
-
- call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)
- call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
- call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> undef, <4 x i8*> undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.scatter.v2i8.v2p0i8(<2 x i8> undef, <2 x i8*> undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.scatter.v1i8.v1p0i8(<1 x i8> undef, <1 x i8*> undef, i32 1, <1 x i1> undef)
+ call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 8, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 8, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 8, <2 x i1> undef)
+ call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 8, <1 x i1> undef)
+
+ call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 4, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 4, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
+ call void @llvm.masked.scatter.v1f32.v1p0(<1 x float> undef, <1 x ptr> undef, i32 4, <1 x i1> undef)
+
+ call void @llvm.masked.scatter.v32f16.v32p0(<32 x half> undef, <32 x ptr> undef, i32 2, <32 x i1> undef)
+ call void @llvm.masked.scatter.v16f16.v16p0(<16 x half> undef, <16 x ptr> undef, i32 2, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
+ call void @llvm.masked.scatter.v1f16.v1p0(<1 x half> undef, <1 x ptr> undef, i32 2, <1 x i1> undef)
+
+ call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 8, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 8, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 8, <2 x i1> undef)
+ call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 8, <1 x i1> undef)
+
+ call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 4, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 4, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 4, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 4, <2 x i1> undef)
+ call void @llvm.masked.scatter.v1i32.v1p0(<1 x i32> undef, <1 x ptr> undef, i32 4, <1 x i1> undef)
+
+ call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 2, <32 x i1> undef)
+ call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 2, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
+ call void @llvm.masked.scatter.v1i16.v1p0(<1 x i16> undef, <1 x ptr> undef, i32 2, <1 x i1> undef)
+
+ call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef)
+ call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+ call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.scatter.v1i8.v1p0(<1 x i8> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
; Test unaligned scatters
- call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 2, <8 x i1> undef)
- call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 2, <4 x i1> undef)
- call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 2, <2 x i1> undef)
- call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 2, <1 x i1> undef)
-
- call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 2, <16 x i1> undef)
- call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 2, <8 x i1> undef)
- call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 2, <4 x i1> undef)
- call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 2, <2 x i1> undef)
- call void @llvm.masked.scatter.v1f32.v1p0f32(<1 x float> undef, <1 x float*> undef, i32 2, <1 x i1> undef)
-
- call void @llvm.masked.scatter.v32f16.v32p0f16(<32 x half> undef, <32 x half*> undef, i32 1, <32 x i1> undef)
- call void @llvm.masked.scatter.v16f16.v16p0f16(<16 x half> undef, <16 x half*> undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> undef, <8 x half*> undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half> undef, <4 x half*> undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.scatter.v2f16.v2p0f16(<2 x half> undef, <2 x half*> undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.scatter.v1f16.v1p0f16(<1 x half> undef, <1 x half*> undef, i32 1, <1 x i1> undef)
-
- call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)
-
- call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.scatter.v1i32.v1p0i32(<1 x i32> undef, <1 x i32*> undef, i32 1, <1 x i1> undef)
-
- call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)
- call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.scatter.v2i16.v2p0i16(<2 x i16> undef, <2 x i16*> undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.scatter.v1i16.v1p0i16(<1 x i16> undef, <1 x i16*> undef, i32 1, <1 x i1> undef)
+ call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
+ call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 2, <1 x i1> undef)
+
+ call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 2, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 2, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 2, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 2, <2 x i1> undef)
+ call void @llvm.masked.scatter.v1f32.v1p0(<1 x float> undef, <1 x ptr> undef, i32 2, <1 x i1> undef)
+
+ call void @llvm.masked.scatter.v32f16.v32p0(<32 x half> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+ call void @llvm.masked.scatter.v16f16.v16p0(<16 x half> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.scatter.v1f16.v1p0(<1 x half> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+
+ call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+
+ call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.scatter.v1i32.v1p0(<1 x i32> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+
+ call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+ call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.scatter.v1i16.v1p0(<1 x i16> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
ret i32 0
}
-declare void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double>, <8 x double*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double>, <4 x double*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double>, <2 x double*>, i32, <2 x i1>)
-declare void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double>, <1 x double*>, i32, <1 x i1>)
-
-declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float>, <8 x float*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float>, <4 x float*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float>, <2 x float*>, i32, <2 x i1>)
-declare void @llvm.masked.scatter.v1f32.v1p0f32(<1 x float>, <1 x float*>, i32, <1 x i1>)
-
-declare void @llvm.masked.scatter.v32f16.v32p0f16(<32 x half>, <32 x half*>, i32, <32 x i1>)
-declare void @llvm.masked.scatter.v16f16.v16p0f16(<16 x half>, <16 x half*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half>, <8 x half*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half>, <4 x half*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2f16.v2p0f16(<2 x half>, <2 x half*>, i32, <2 x i1>)
-declare void @llvm.masked.scatter.v1f16.v1p0f16(<1 x half>, <1 x half*>, i32, <1 x i1>)
-
-declare void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64>, <8 x i64*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64>, <4 x i64*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64>, <2 x i64*>, i32, <2 x i1>)
-declare void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64>, <1 x i64*>, i32, <1 x i1>)
-
-declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>, <16 x i32*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32>, <8 x i32*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32>, <2 x i32*>, i32, <2 x i1>)
-declare void @llvm.masked.scatter.v1i32.v1p0i32(<1 x i32>, <1 x i32*>, i32, <1 x i1>)
-
-declare void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16>, <32 x i16*>, i32, <32 x i1>)
-declare void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16>, <16 x i16*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16>, <8 x i16*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16>, <4 x i16*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2i16.v2p0i16(<2 x i16>, <2 x i16*>, i32, <2 x i1>)
-declare void @llvm.masked.scatter.v1i16.v1p0i16(<1 x i16>, <1 x i16*>, i32, <1 x i1>)
-
-declare void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8>, <64 x i8*>, i32, <64 x i1>)
-declare void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8>, <32 x i8*>, i32, <32 x i1>)
-declare void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8>, <16 x i8*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8>, <8 x i8*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8>, <4 x i8*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2i8.v2p0i8(<2 x i8>, <2 x i8*>, i32, <2 x i1>)
-declare void @llvm.masked.scatter.v1i8.v1p0i8(<1 x i8>, <1 x i8*>, i32, <1 x i1>)
+declare void @llvm.masked.scatter.v8f64.v8p0(<8 x double>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, i32, <2 x i1>)
+declare void @llvm.masked.scatter.v1f64.v1p0(<1 x double>, <1 x ptr>, i32, <1 x i1>)
+
+declare void @llvm.masked.scatter.v16f32.v16p0(<16 x float>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, i32, <2 x i1>)
+declare void @llvm.masked.scatter.v1f32.v1p0(<1 x float>, <1 x ptr>, i32, <1 x i1>)
+
+declare void @llvm.masked.scatter.v32f16.v32p0(<32 x half>, <32 x ptr>, i32, <32 x i1>)
+declare void @llvm.masked.scatter.v16f16.v16p0(<16 x half>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v8f16.v8p0(<8 x half>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4f16.v4p0(<4 x half>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2f16.v2p0(<2 x half>, <2 x ptr>, i32, <2 x i1>)
+declare void @llvm.masked.scatter.v1f16.v1p0(<1 x half>, <1 x ptr>, i32, <1 x i1>)
+
+declare void @llvm.masked.scatter.v8i64.v8p0(<8 x i64>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4i64.v4p0(<4 x i64>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2i64.v2p0(<2 x i64>, <2 x ptr>, i32, <2 x i1>)
+declare void @llvm.masked.scatter.v1i64.v1p0(<1 x i64>, <1 x ptr>, i32, <1 x i1>)
+
+declare void @llvm.masked.scatter.v16i32.v16p0(<16 x i32>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, i32, <2 x i1>)
+declare void @llvm.masked.scatter.v1i32.v1p0(<1 x i32>, <1 x ptr>, i32, <1 x i1>)
+
+declare void @llvm.masked.scatter.v32i16.v32p0(<32 x i16>, <32 x ptr>, i32, <32 x i1>)
+declare void @llvm.masked.scatter.v16i16.v16p0(<16 x i16>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2i16.v2p0(<2 x i16>, <2 x ptr>, i32, <2 x i1>)
+declare void @llvm.masked.scatter.v1i16.v1p0(<1 x i16>, <1 x ptr>, i32, <1 x i1>)
+
+declare void @llvm.masked.scatter.v64i8.v64p0(<64 x i8>, <64 x ptr>, i32, <64 x i1>)
+declare void @llvm.masked.scatter.v32i8.v32p0(<32 x i8>, <32 x ptr>, i32, <32 x i1>)
+declare void @llvm.masked.scatter.v16i8.v16p0(<16 x i8>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v8i8.v8p0(<8 x i8>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2i8.v2p0(<2 x i8>, <2 x ptr>, i32, <2 x i1>)
+declare void @llvm.masked.scatter.v1i8.v1p0(<1 x i8>, <1 x ptr>, i32, <1 x i1>)
; RUN: opt -mtriple=riscv64 -passes="print<cost-model>" 2>&1 -disable-output < %s \
; RUN: | FileCheck %s -check-prefix=RVI
-define void @testi8(i8* %a, i32 %i) {
+define void @testi8(ptr %a, i32 %i) {
; RVI-LABEL: 'testi8'
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, i8* %a, i32 1
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds i8, i8* %a, i32 -1
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds i8, i8* %a, i32 2047
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4 = getelementptr inbounds i8, i8* %a, i32 2048
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds i8, i8* %a, i32 -2048
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a6 = getelementptr inbounds i8, i8* %a, i32 -2049
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, ptr %a, i32 1
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds i8, ptr %a, i32 -1
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds i8, ptr %a, i32 2047
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4 = getelementptr inbounds i8, ptr %a, i32 2048
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds i8, ptr %a, i32 -2048
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a6 = getelementptr inbounds i8, ptr %a, i32 -2049
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i8, ptr %a, i32 %i
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- %a0 = getelementptr inbounds i8, i8* %a, i32 0
- %a1 = getelementptr inbounds i8, i8* %a, i32 1
- %a2 = getelementptr inbounds i8, i8* %a, i32 -1
- %a3 = getelementptr inbounds i8, i8* %a, i32 2047
- %a4 = getelementptr inbounds i8, i8* %a, i32 2048
- %a5 = getelementptr inbounds i8, i8* %a, i32 -2048
- %a6 = getelementptr inbounds i8, i8* %a, i32 -2049
- %ai = getelementptr inbounds i8, i8* %a, i32 %i
+ %a1 = getelementptr inbounds i8, ptr %a, i32 1
+ %a2 = getelementptr inbounds i8, ptr %a, i32 -1
+ %a3 = getelementptr inbounds i8, ptr %a, i32 2047
+ %a4 = getelementptr inbounds i8, ptr %a, i32 2048
+ %a5 = getelementptr inbounds i8, ptr %a, i32 -2048
+ %a6 = getelementptr inbounds i8, ptr %a, i32 -2049
+ %ai = getelementptr inbounds i8, ptr %a, i32 %i
ret void
}
-define void @testi16(i16* %a, i32 %i) {
+define void @testi16(ptr %a, i32 %i) {
; RVI-LABEL: 'testi16'
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* %a, i32 1
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds i16, i16* %a, i32 -1
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds i16, i16* %a, i32 1023
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4 = getelementptr inbounds i16, i16* %a, i32 1024
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds i16, i16* %a, i32 -1024
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a6 = getelementptr inbounds i16, i16* %a, i32 -1025
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, ptr %a, i32 1
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds i16, ptr %a, i32 -1
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds i16, ptr %a, i32 1023
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4 = getelementptr inbounds i16, ptr %a, i32 1024
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds i16, ptr %a, i32 -1024
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a6 = getelementptr inbounds i16, ptr %a, i32 -1025
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i16, ptr %a, i32 %i
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- %a0 = getelementptr inbounds i16, i16* %a, i32 0
- %a1 = getelementptr inbounds i16, i16* %a, i32 1
- %a2 = getelementptr inbounds i16, i16* %a, i32 -1
- %a3 = getelementptr inbounds i16, i16* %a, i32 1023
- %a4 = getelementptr inbounds i16, i16* %a, i32 1024
- %a5 = getelementptr inbounds i16, i16* %a, i32 -1024
- %a6 = getelementptr inbounds i16, i16* %a, i32 -1025
- %ai = getelementptr inbounds i16, i16* %a, i32 %i
+ %a1 = getelementptr inbounds i16, ptr %a, i32 1
+ %a2 = getelementptr inbounds i16, ptr %a, i32 -1
+ %a3 = getelementptr inbounds i16, ptr %a, i32 1023
+ %a4 = getelementptr inbounds i16, ptr %a, i32 1024
+ %a5 = getelementptr inbounds i16, ptr %a, i32 -1024
+ %a6 = getelementptr inbounds i16, ptr %a, i32 -1025
+ %ai = getelementptr inbounds i16, ptr %a, i32 %i
ret void
}
-define void @testi32(i32* %a, i32 %i) {
+define void @testi32(ptr %a, i32 %i) {
; RVI-LABEL: 'testi32'
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, i32* %a, i32 1
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds i32, i32* %a, i32 -1
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds i32, i32* %a, i32 511
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4 = getelementptr inbounds i32, i32* %a, i32 512
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds i32, i32* %a, i32 -512
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a6 = getelementptr inbounds i32, i32* %a, i32 -513
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, ptr %a, i32 1
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds i32, ptr %a, i32 -1
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds i32, ptr %a, i32 511
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4 = getelementptr inbounds i32, ptr %a, i32 512
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds i32, ptr %a, i32 -512
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a6 = getelementptr inbounds i32, ptr %a, i32 -513
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i32, ptr %a, i32 %i
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- %a0 = getelementptr inbounds i32, i32* %a, i32 0
- %a1 = getelementptr inbounds i32, i32* %a, i32 1
- %a2 = getelementptr inbounds i32, i32* %a, i32 -1
- %a3 = getelementptr inbounds i32, i32* %a, i32 511
- %a4 = getelementptr inbounds i32, i32* %a, i32 512
- %a5 = getelementptr inbounds i32, i32* %a, i32 -512
- %a6 = getelementptr inbounds i32, i32* %a, i32 -513
- %ai = getelementptr inbounds i32, i32* %a, i32 %i
+ %a1 = getelementptr inbounds i32, ptr %a, i32 1
+ %a2 = getelementptr inbounds i32, ptr %a, i32 -1
+ %a3 = getelementptr inbounds i32, ptr %a, i32 511
+ %a4 = getelementptr inbounds i32, ptr %a, i32 512
+ %a5 = getelementptr inbounds i32, ptr %a, i32 -512
+ %a6 = getelementptr inbounds i32, ptr %a, i32 -513
+ %ai = getelementptr inbounds i32, ptr %a, i32 %i
ret void
}
-define void @testi64(i64* %a, i32 %i) {
+define void @testi64(ptr %a, i32 %i) {
; RVI-LABEL: 'testi64'
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, i64* %a, i32 1
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds i64, i64* %a, i32 -1
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds i64, i64* %a, i32 255
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4 = getelementptr inbounds i64, i64* %a, i32 256
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds i64, i64* %a, i32 -256
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a6 = getelementptr inbounds i64, i64* %a, i32 -257
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, ptr %a, i32 1
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds i64, ptr %a, i32 -1
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds i64, ptr %a, i32 255
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4 = getelementptr inbounds i64, ptr %a, i32 256
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds i64, ptr %a, i32 -256
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a6 = getelementptr inbounds i64, ptr %a, i32 -257
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, ptr %a, i32 %i
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- %a0 = getelementptr inbounds i64, i64* %a, i32 0
- %a1 = getelementptr inbounds i64, i64* %a, i32 1
- %a2 = getelementptr inbounds i64, i64* %a, i32 -1
- %a3 = getelementptr inbounds i64, i64* %a, i32 255
- %a4 = getelementptr inbounds i64, i64* %a, i32 256
- %a5 = getelementptr inbounds i64, i64* %a, i32 -256
- %a6 = getelementptr inbounds i64, i64* %a, i32 -257
- %ai = getelementptr inbounds i64, i64* %a, i32 %i
+ %a1 = getelementptr inbounds i64, ptr %a, i32 1
+ %a2 = getelementptr inbounds i64, ptr %a, i32 -1
+ %a3 = getelementptr inbounds i64, ptr %a, i32 255
+ %a4 = getelementptr inbounds i64, ptr %a, i32 256
+ %a5 = getelementptr inbounds i64, ptr %a, i32 -256
+ %a6 = getelementptr inbounds i64, ptr %a, i32 -257
+ %ai = getelementptr inbounds i64, ptr %a, i32 %i
ret void
}
-define void @testfloat(float* %a, i32 %i) {
+define void @testfloat(ptr %a, i32 %i) {
; RVI-LABEL: 'testfloat'
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds float, float* %a, i32 0
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, float* %a, i32 1
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds float, float* %a, i32 -1
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds float, float* %a, i32 511
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4 = getelementptr inbounds float, float* %a, i32 512
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds float, float* %a, i32 -512
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a6 = getelementptr inbounds float, float* %a, i32 -513
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, float* %a, i32 %i
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds float, ptr %a, i32 1
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds float, ptr %a, i32 -1
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds float, ptr %a, i32 511
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4 = getelementptr inbounds float, ptr %a, i32 512
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds float, ptr %a, i32 -512
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a6 = getelementptr inbounds float, ptr %a, i32 -513
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, ptr %a, i32 %i
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- %a0 = getelementptr inbounds float, float* %a, i32 0
- %a1 = getelementptr inbounds float, float* %a, i32 1
- %a2 = getelementptr inbounds float, float* %a, i32 -1
- %a3 = getelementptr inbounds float, float* %a, i32 511
- %a4 = getelementptr inbounds float, float* %a, i32 512
- %a5 = getelementptr inbounds float, float* %a, i32 -512
- %a6 = getelementptr inbounds float, float* %a, i32 -513
- %ai = getelementptr inbounds float, float* %a, i32 %i
+ %a1 = getelementptr inbounds float, ptr %a, i32 1
+ %a2 = getelementptr inbounds float, ptr %a, i32 -1
+ %a3 = getelementptr inbounds float, ptr %a, i32 511
+ %a4 = getelementptr inbounds float, ptr %a, i32 512
+ %a5 = getelementptr inbounds float, ptr %a, i32 -512
+ %a6 = getelementptr inbounds float, ptr %a, i32 -513
+ %ai = getelementptr inbounds float, ptr %a, i32 %i
ret void
}
-define void @testdouble(double* %a, i32 %i) {
+define void @testdouble(ptr %a, i32 %i) {
; RVI-LABEL: 'testdouble'
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, double* %a, i32 1
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds double, double* %a, i32 -1
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds double, double* %a, i32 255
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4 = getelementptr inbounds double, double* %a, i32 256
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds double, double* %a, i32 -256
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a6 = getelementptr inbounds double, double* %a, i32 -257
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, ptr %a, i32 1
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds double, ptr %a, i32 -1
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds double, ptr %a, i32 255
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4 = getelementptr inbounds double, ptr %a, i32 256
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds double, ptr %a, i32 -256
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a6 = getelementptr inbounds double, ptr %a, i32 -257
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, ptr %a, i32 %i
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- %a0 = getelementptr inbounds double, double* %a, i32 0
- %a1 = getelementptr inbounds double, double* %a, i32 1
- %a2 = getelementptr inbounds double, double* %a, i32 -1
- %a3 = getelementptr inbounds double, double* %a, i32 255
- %a4 = getelementptr inbounds double, double* %a, i32 256
- %a5 = getelementptr inbounds double, double* %a, i32 -256
- %a6 = getelementptr inbounds double, double* %a, i32 -257
- %ai = getelementptr inbounds double, double* %a, i32 %i
+ %a1 = getelementptr inbounds double, ptr %a, i32 1
+ %a2 = getelementptr inbounds double, ptr %a, i32 -1
+ %a3 = getelementptr inbounds double, ptr %a, i32 255
+ %a4 = getelementptr inbounds double, ptr %a, i32 256
+ %a5 = getelementptr inbounds double, ptr %a, i32 -256
+ %a6 = getelementptr inbounds double, ptr %a, i32 -257
+ %ai = getelementptr inbounds double, ptr %a, i32 %i
ret void
}
define void @testvecs(i32 %i) {
; RVI-LABEL: 'testvecs'
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b0 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 1
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b1 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 1
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b2 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 1
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b3 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 1
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b4 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 1
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b5 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 1
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c1 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 128
-; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c2 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 128
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c3 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 128
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c4 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 128
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 128
-; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 128
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b0 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b1 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b2 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b3 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b4 = getelementptr inbounds <4 x float>, ptr undef, i32 1
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b5 = getelementptr inbounds <4 x double>, ptr undef, i32 1
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c1 = getelementptr inbounds <4 x i8>, ptr undef, i32 128
+; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c2 = getelementptr inbounds <4 x i16>, ptr undef, i32 128
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c3 = getelementptr inbounds <4 x i32>, ptr undef, i32 128
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c4 = getelementptr inbounds <4 x i64>, ptr undef, i32 128
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = getelementptr inbounds <4 x float>, ptr undef, i32 128
+; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = getelementptr inbounds <4 x double>, ptr undef, i32 128
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- %a0 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
- %a1 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
- %a2 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
- %a3 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
- %a4 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
- %a5 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
- %b0 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 1
- %b1 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 1
- %b2 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 1
- %b3 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 1
- %b4 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 1
- %b5 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 1
+ %b0 = getelementptr inbounds <4 x i8>, ptr undef, i32 1
+ %b1 = getelementptr inbounds <4 x i16>, ptr undef, i32 1
+ %b2 = getelementptr inbounds <4 x i32>, ptr undef, i32 1
+ %b3 = getelementptr inbounds <4 x i64>, ptr undef, i32 1
+ %b4 = getelementptr inbounds <4 x float>, ptr undef, i32 1
+ %b5 = getelementptr inbounds <4 x double>, ptr undef, i32 1
- %c1 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 128
- %c2 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 128
- %c3 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 128
- %c4 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 128
- %c5 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 128
- %c6 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 128
+ %c1 = getelementptr inbounds <4 x i8>, ptr undef, i32 128
+ %c2 = getelementptr inbounds <4 x i16>, ptr undef, i32 128
+ %c3 = getelementptr inbounds <4 x i32>, ptr undef, i32 128
+ %c4 = getelementptr inbounds <4 x i64>, ptr undef, i32 128
+ %c5 = getelementptr inbounds <4 x float>, ptr undef, i32 128
+ %c6 = getelementptr inbounds <4 x double>, ptr undef, i32 128
ret void
}
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=riscv64 < %s | FileCheck %s --check-prefix=CHECK
; Check that cost is 1 for unusual load to register sized load.
-define i32 @loadUnusualIntegerWithTrunc(i128* %ptr) {
+define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualIntegerWithTrunc'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, i128* %ptr, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %trunc = trunc i128 %out to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %trunc
;
- %out = load i128, i128* %ptr
+ %out = load i128, ptr %ptr
%trunc = trunc i128 %out to i32
ret i32 %trunc
}
-define i128 @loadUnusualInteger(i128* %ptr) {
+define i128 @loadUnusualInteger(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualInteger'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %out = load i128, i128* %ptr, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %out = load i128, ptr %ptr, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i128 %out
;
- %out = load i128, i128* %ptr
+ %out = load i128, ptr %ptr
ret i128 %out
}
define void @fixed() {
; CHECK-LABEL: 'fixed'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 8, <2 x i1> undef, <2 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 8, <4 x i1> undef, <4 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 8, <8 x i1> undef, <8 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 8, <16 x i1> undef, <16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 8, <2 x i1> undef, <2 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 8, <4 x i1> undef, <4 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 8, <8 x i1> undef, <8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 8, <2 x i1> undef, <2 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 8, <4 x i1> undef, <4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 8, <2 x i1> undef, <2 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 8, <2 x i1> undef, <2 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 8, <4 x i1> undef, <4 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 8, <8 x i1> undef, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 8, <2 x i1> undef, <2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 8, <4 x i1> undef, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 8, <2 x i1> undef, <2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 8, <4 x i1> undef, <4 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0v32f16(<32 x half>* undef, i32 8, <32 x i1> undef, <32 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 8, <16 x i1> undef, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
; fixed-width integer types
- %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8> *undef, i32 8, <2 x i1> undef, <2 x i8> undef)
- %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8> *undef, i32 8, <4 x i1> undef, <4 x i8> undef)
- %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8> *undef, i32 8, <8 x i1> undef, <8 x i8> undef)
- %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8> *undef, i32 8, <16 x i1> undef, <16 x i8> undef)
- %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16> *undef, i32 8, <2 x i1> undef, <2 x i16> undef)
- %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16> *undef, i32 8, <4 x i1> undef, <4 x i16> undef)
- %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16> *undef, i32 8, <8 x i1> undef, <8 x i16> undef)
- %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32> *undef, i32 8, <2 x i1> undef, <2 x i32> undef)
- %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32> *undef, i32 8, <4 x i1> undef, <4 x i32> undef)
- %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64> *undef, i32 8, <2 x i1> undef, <2 x i64> undef)
+ %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i8> undef)
+ %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i8> undef)
+ %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i8> undef)
+ %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 8, <16 x i1> undef, <16 x i8> undef)
+ %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i16> undef)
+ %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i16> undef)
+ %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i16> undef)
+ %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i32> undef)
+ %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i32> undef)
+ %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i64> undef)
; fixed-width floating point types
- %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half> *undef, i32 8, <2 x i1> undef, <2 x half> undef)
- %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half> *undef, i32 8, <4 x i1> undef, <4 x half> undef)
- %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half> *undef, i32 8, <8 x i1> undef, <8 x half> undef)
- %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float> *undef, i32 8, <2 x i1> undef, <2 x float> undef)
- %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float> *undef, i32 8, <4 x i1> undef, <4 x float> undef)
- %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double> *undef, i32 8, <2 x i1> undef, <2 x double> undef)
+ %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef)
+ %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef)
+ %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef)
+ %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x float> undef)
+ %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x float> undef)
+ %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x double> undef)
- %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64> *undef, i32 8, <4 x i1> undef, <4 x i64> undef)
- %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0v32f16(<32 x half> *undef, i32 8, <32 x i1> undef, <32 x half> undef)
+ %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i64> undef)
+ %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef)
ret void
}
define void @scalable() {
; CHECK-LABEL: 'scalable'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16 = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv2f16 = call <vscale x 2 x half> @llvm.masked.load.nxv2f16.p0nxv2f16(<vscale x 2 x half>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4f16 = call <vscale x 4 x half> @llvm.masked.load.nxv4f16.p0nxv4f16(<vscale x 4 x half>* undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv8f16 = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32 = call <vscale x 2 x float> @llvm.masked.load.nxv2f32.p0nxv2f32(<vscale x 2 x float>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64 = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64 = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0nxv1i64(<vscale x 1 x i64>* undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i64 = call <vscale x 4 x i64> @llvm.masked.load.nxv4i64.p0nxv4i64(<vscale x 4 x i64>* undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv32f16 = call <vscale x 32 x half> @llvm.masked.load.nxv32f16.p0nxv32f16(<vscale x 32 x half>* undef, i32 8, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16 = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv2f16 = call <vscale x 2 x half> @llvm.masked.load.nxv2f16.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv4f16 = call <vscale x 4 x half> @llvm.masked.load.nxv4f16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv8f16 = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32 = call <vscale x 2 x float> @llvm.masked.load.nxv2f32.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64 = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64 = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i64 = call <vscale x 4 x i64> @llvm.masked.load.nxv4i64.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nxv32f16 = call <vscale x 32 x half> @llvm.masked.load.nxv32f16.p0(ptr undef, i32 8, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
entry:
; scalable integer types
- %nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
- %nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8> *undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
- %nxv8i8 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8> *undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
- %nxv16i8 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8> *undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
- %nxv2i16 = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
- %nxv4i16 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16> *undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
- %nxv8i16 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16> *undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
- %nxv2i32 = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
- %nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32> *undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
- %nxv2i64 = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
+ %nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
+ %nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
+ %nxv8i8 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
+ %nxv16i8 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+ %nxv2i16 = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
+ %nxv4i16 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+ %nxv8i16 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+ %nxv2i32 = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
+ %nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
+ %nxv2i64 = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
; scalable floating point types
- %nxv2f16 = call <vscale x 2 x half> @llvm.masked.load.nxv2f16.p0nxv2f16(<vscale x 2 x half> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
- %nxv4f16 = call <vscale x 4 x half> @llvm.masked.load.nxv4f16.p0nxv4f16(<vscale x 4 x half> *undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
- %nxv8f16 = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half> *undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
- %nxv2f32 = call <vscale x 2 x float> @llvm.masked.load.nxv2f32.p0nxv2f32(<vscale x 2 x float> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
- %nxv4f32 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float> *undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
- %nxv2f64 = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double> *undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+ %nxv2f16 = call <vscale x 2 x half> @llvm.masked.load.nxv2f16.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
+ %nxv4f16 = call <vscale x 4 x half> @llvm.masked.load.nxv4f16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
+ %nxv8f16 = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
+ %nxv2f32 = call <vscale x 2 x float> @llvm.masked.load.nxv2f32.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+ %nxv4f32 = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+ %nxv2f64 = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
- %nxv1i64 = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0nxv1i64(<vscale x 1 x i64> *undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
- %nxv4i64 = call <vscale x 4 x i64> @llvm.masked.load.nxv4i64.p0nxv4i64(<vscale x 4 x i64> *undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
- %nxv32f16 = call <vscale x 32 x half> @llvm.masked.load.nxv32f16.p0nxv32f16(<vscale x 32 x half> *undef, i32 8, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
+ %nxv1i64 = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
+ %nxv4i64 = call <vscale x 4 x i64> @llvm.masked.load.nxv4i64.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
+ %nxv32f16 = call <vscale x 32 x half> @llvm.masked.load.nxv32f16.p0(ptr undef, i32 8, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
ret void
}
-declare <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>*, i32, <2 x i1>, <2 x i8>)
-declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32, <4 x i1>, <4 x i8>)
-declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>)
-declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>)
-declare <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>*, i32, <2 x i1>, <2 x i16>)
-declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32, <4 x i1>, <4 x i16>)
-declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>)
-declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
-declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
-declare <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>*, i32, <2 x i1>, <2 x i64>)
-declare <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>*, i32, <4 x i1>, <4 x i64>)
-declare <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>*, i32, <2 x i1>, <2 x half>)
-declare <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>*, i32, <4 x i1>, <4 x half>)
-declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32, <8 x i1>, <8 x half>)
-declare <32 x half> @llvm.masked.load.v32f16.p0v32f16(<32 x half>*, i32, <32 x i1>, <32 x half>)
-declare <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
-declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
-declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
+declare <2 x i8> @llvm.masked.load.v2i8.p0(ptr, i32, <2 x i1>, <2 x i8>)
+declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32, <4 x i1>, <4 x i8>)
+declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32, <8 x i1>, <8 x i8>)
+declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32, <16 x i1>, <16 x i8>)
+declare <2 x i16> @llvm.masked.load.v2i16.p0(ptr, i32, <2 x i1>, <2 x i16>)
+declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32, <4 x i1>, <4 x i16>)
+declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32, <8 x i1>, <8 x i16>)
+declare <2 x i32> @llvm.masked.load.v2i32.p0(ptr, i32, <2 x i1>, <2 x i32>)
+declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>)
+declare <2 x i64> @llvm.masked.load.v2i64.p0(ptr, i32, <2 x i1>, <2 x i64>)
+declare <4 x i64> @llvm.masked.load.v4i64.p0(ptr, i32, <4 x i1>, <4 x i64>)
+declare <2 x half> @llvm.masked.load.v2f16.p0(ptr, i32, <2 x i1>, <2 x half>)
+declare <4 x half> @llvm.masked.load.v4f16.p0(ptr, i32, <4 x i1>, <4 x half>)
+declare <8 x half> @llvm.masked.load.v8f16.p0(ptr, i32, <8 x i1>, <8 x half>)
+declare <32 x half> @llvm.masked.load.v32f16.p0(ptr, i32, <32 x i1>, <32 x half>)
+declare <2 x float> @llvm.masked.load.v2f32.p0(ptr, i32, <2 x i1>, <2 x float>)
+declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32, <4 x i1>, <4 x float>)
+declare <2 x double> @llvm.masked.load.v2f64.p0(ptr, i32, <2 x i1>, <2 x double>)
-declare <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>*, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
-declare <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>*, i32, <vscale x 4 x i1>, <vscale x 4 x i8>)
-declare <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>*, i32, <vscale x 8 x i1>, <vscale x 8 x i8>)
-declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>*, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
-declare <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>*, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
-declare <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>*, i32, <vscale x 4 x i1>, <vscale x 4 x i16>)
-declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>*, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
-declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>*, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
-declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>*, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>*, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.masked.load.nxv4i64.p0nxv4i64(<vscale x 4 x i64>*, i32, <vscale x 4 x i1>, <vscale x 4 x i64>)
-declare <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0nxv1i64(<vscale x 1 x i64>*, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)
-declare <vscale x 2 x half> @llvm.masked.load.nxv2f16.p0nxv2f16(<vscale x 2 x half>*, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
-declare <vscale x 4 x half> @llvm.masked.load.nxv4f16.p0nxv4f16(<vscale x 4 x half>*, i32, <vscale x 4 x i1>, <vscale x 4 x half>)
-declare <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>*, i32, <vscale x 8 x i1>, <vscale x 8 x half>)
-declare <vscale x 32 x half> @llvm.masked.load.nxv32f16.p0nxv32f16(<vscale x 32 x half>*, i32, <vscale x 32 x i1>, <vscale x 32 x half>)
-declare <vscale x 2 x float> @llvm.masked.load.nxv2f32.p0nxv2f32(<vscale x 2 x float>*, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
-declare <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>*, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
-declare <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>*, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
+declare <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
+declare <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i8>)
+declare <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr, i32, <vscale x 8 x i1>, <vscale x 8 x i8>)
+declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
+declare <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
+declare <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i16>)
+declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
+declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.masked.load.nxv4i64.p0(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i64>)
+declare <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)
+declare <vscale x 2 x half> @llvm.masked.load.nxv2f16.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
+declare <vscale x 4 x half> @llvm.masked.load.nxv4f16.p0(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x half>)
+declare <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0(ptr, i32, <vscale x 8 x i1>, <vscale x 8 x half>)
+declare <vscale x 32 x half> @llvm.masked.load.nxv32f16.p0(ptr, i32, <vscale x 32 x i1>, <vscale x 32 x half>)
+declare <vscale x 2 x float> @llvm.masked.load.nxv2f32.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
+declare <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
define void @masked_gather_aligned() {
; GENERIC-LABEL: 'masked_gather_aligned'
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0f64(<vscale x 1 x double*> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0f32(<vscale x 16 x float*> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0f32(<vscale x 2 x float*> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0f32(<vscale x 1 x float*> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0f16(<vscale x 32 x half*> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0f16(<vscale x 16 x half*> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0f16(<vscale x 4 x half*> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0f16(<vscale x 2 x half*> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0f16(<vscale x 1 x half*> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0i64(<vscale x 4 x i64*> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0i64(<vscale x 2 x i64*> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0i64(<vscale x 1 x i64*> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0i32(<vscale x 16 x i32*> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0i32(<vscale x 1 x i32*> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0i16(<vscale x 32 x i16*> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0i16(<vscale x 16 x i16*> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0i16(<vscale x 1 x i16*> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0i8(<vscale x 64 x i8*> undef, i32 1, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0i8(<vscale x 32 x i8*> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0i8(<vscale x 16 x i8*> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0i8(<vscale x 8 x i8*> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0i8(<vscale x 4 x i8*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0i8(<vscale x 1 x i8*> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8PTR = call <vscale x 8 x i8*> @llvm.masked.gather.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8**> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8*> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4PTR = call <vscale x 4 x i8*> @llvm.masked.gather.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8**> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8*> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2PTR = call <vscale x 2 x i8*> @llvm.masked.gather.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8**> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8*> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1PTR = call <vscale x 1 x i8*> @llvm.masked.gather.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8**> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i8*> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> undef, i32 1, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; MAX256-LABEL: 'masked_gather_aligned'
-; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0f64(<vscale x 1 x double*> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0f32(<vscale x 16 x float*> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0f32(<vscale x 2 x float*> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0f32(<vscale x 1 x float*> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0f16(<vscale x 32 x half*> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0f16(<vscale x 16 x half*> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0f16(<vscale x 4 x half*> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0f16(<vscale x 2 x half*> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0f16(<vscale x 1 x half*> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0i64(<vscale x 4 x i64*> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0i64(<vscale x 2 x i64*> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0i64(<vscale x 1 x i64*> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0i32(<vscale x 16 x i32*> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0i32(<vscale x 1 x i32*> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0i16(<vscale x 32 x i16*> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0i16(<vscale x 16 x i16*> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0i16(<vscale x 1 x i16*> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0i8(<vscale x 64 x i8*> undef, i32 1, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0i8(<vscale x 32 x i8*> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0i8(<vscale x 16 x i8*> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0i8(<vscale x 8 x i8*> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0i8(<vscale x 4 x i8*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0i8(<vscale x 1 x i8*> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8PTR = call <vscale x 8 x i8*> @llvm.masked.gather.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8**> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8*> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4PTR = call <vscale x 4 x i8*> @llvm.masked.gather.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8**> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8*> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2PTR = call <vscale x 2 x i8*> @llvm.masked.gather.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8**> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8*> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1PTR = call <vscale x 1 x i8*> @llvm.masked.gather.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8**> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i8*> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> undef, i32 1, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; UNSUPPORTED-LABEL: 'masked_gather_aligned'
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0f64(<vscale x 1 x double*> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0f32(<vscale x 16 x float*> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0f32(<vscale x 2 x float*> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0f32(<vscale x 1 x float*> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0f16(<vscale x 32 x half*> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0f16(<vscale x 16 x half*> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0f16(<vscale x 4 x half*> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0f16(<vscale x 2 x half*> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0f16(<vscale x 1 x half*> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0i64(<vscale x 4 x i64*> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0i64(<vscale x 2 x i64*> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0i64(<vscale x 1 x i64*> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0i32(<vscale x 16 x i32*> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0i32(<vscale x 1 x i32*> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0i16(<vscale x 32 x i16*> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0i16(<vscale x 16 x i16*> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0i16(<vscale x 1 x i16*> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0i8(<vscale x 64 x i8*> undef, i32 1, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0i8(<vscale x 32 x i8*> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0i8(<vscale x 16 x i8*> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0i8(<vscale x 8 x i8*> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0i8(<vscale x 4 x i8*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0i8(<vscale x 1 x i8*> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V8PTR = call <vscale x 8 x i8*> @llvm.masked.gather.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8**> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8*> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V4PTR = call <vscale x 4 x i8*> @llvm.masked.gather.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8**> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8*> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V2PTR = call <vscale x 2 x i8*> @llvm.masked.gather.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8**> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8*> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V1PTR = call <vscale x 1 x i8*> @llvm.masked.gather.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8**> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i8*> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> undef, i32 1, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef)
; UNSUPPORTED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
- %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
- %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
- %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0f64(<vscale x 1 x double*> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
+ %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
+ %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
+ %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+ %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
- %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0f32(<vscale x 16 x float*> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
- %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
- %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
- %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0f32(<vscale x 2 x float*> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
- %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0f32(<vscale x 1 x float*> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
+ %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
+ %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
+ %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+ %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+ %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
- %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0f16(<vscale x 32 x half*> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
- %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0f16(<vscale x 16 x half*> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
- %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
- %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0f16(<vscale x 4 x half*> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
- %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0f16(<vscale x 2 x half*> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
- %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0f16(<vscale x 1 x half*> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
+ %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
+ %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
+ %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
+ %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
+ %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
+ %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
- %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
- %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0i64(<vscale x 4 x i64*> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
- %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0i64(<vscale x 2 x i64*> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
- %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0i64(<vscale x 1 x i64*> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
+ %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
+ %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
+ %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
+ %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
- %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0i32(<vscale x 16 x i32*> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
- %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
- %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
- %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
- %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0i32(<vscale x 1 x i32*> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
+ %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
+ %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
+ %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
+ %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
+ %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
- %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0i16(<vscale x 32 x i16*> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
- %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0i16(<vscale x 16 x i16*> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
- %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
- %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
- %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
- %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0i16(<vscale x 1 x i16*> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
+ %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
+ %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
+ %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+ %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+ %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
+ %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
- %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0i8(<vscale x 64 x i8*> undef, i32 1, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef)
- %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0i8(<vscale x 32 x i8*> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef)
- %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0i8(<vscale x 16 x i8*> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
- %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0i8(<vscale x 8 x i8*> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
- %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0i8(<vscale x 4 x i8*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
- %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
- %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0i8(<vscale x 1 x i8*> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
+ %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> undef, i32 1, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef)
+ %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef)
+ %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+ %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
+ %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
+ %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
+ %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
- %V8PTR = call <vscale x 8 x i8*> @llvm.masked.gather.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8**> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8*> undef)
- %V4PTR = call <vscale x 4 x i8*> @llvm.masked.gather.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8**> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8*> undef)
- %V2PTR = call <vscale x 2 x i8*> @llvm.masked.gather.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8**> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8*> undef)
- %V1PTR= call <vscale x 1 x i8*> @llvm.masked.gather.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8**> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i8*> undef)
+ %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef)
+ %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef)
+ %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef)
+ %V1PTR= call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef)
ret void
}
define void @masked_gather_unaligned() {
; CHECK-LABEL: 'masked_gather_unaligned'
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V8F64.u = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V4F64.u = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V2F64.u = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1F64.u = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0f64(<vscale x 1 x double*> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V16F32.u = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0f32(<vscale x 16 x float*> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V8F32.u = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V4F32.u = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V2F32.u = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0f32(<vscale x 2 x float*> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1F32.u = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0f32(<vscale x 1 x float*> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V32F16.u = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0f16(<vscale x 32 x half*> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V16F16.u = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0f16(<vscale x 16 x half*> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V8F16.u = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V4F16.u = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0f16(<vscale x 4 x half*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V2F16.u = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0f16(<vscale x 2 x half*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1F16.u = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0f16(<vscale x 1 x half*> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V8I64.u = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V4I64.u = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0i64(<vscale x 4 x i64*> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V2I64.u = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0i64(<vscale x 2 x i64*> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1I64.u = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0i64(<vscale x 1 x i64*> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V16I32.u = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0i32(<vscale x 16 x i32*> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V8I32.u = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V4I32.u = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V2I32.u = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1I32.u = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0i32(<vscale x 1 x i32*> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V32I16.u = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0i16(<vscale x 32 x i16*> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V16I16.u = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0i16(<vscale x 16 x i16*> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V8I16.u = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V4I16.u = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V2I16.u = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1I16.u = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0i16(<vscale x 1 x i16*> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V8PTR = call <vscale x 8 x i8*> @llvm.masked.gather.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8**> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i8*> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V4PTR = call <vscale x 4 x i8*> @llvm.masked.gather.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8**> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i8*> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V2PTR = call <vscale x 2 x i8*> @llvm.masked.gather.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8**> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8*> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1PTR = call <vscale x 1 x i8*> @llvm.masked.gather.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8**> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8*> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V8F64.u = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V4F64.u = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V2F64.u = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1F64.u = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V16F32.u = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V8F32.u = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V4F32.u = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V2F32.u = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1F32.u = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V32F16.u = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V16F16.u = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V8F16.u = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V4F16.u = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V2F16.u = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1F16.u = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V8I64.u = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V4I64.u = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V2I64.u = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1I64.u = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V16I32.u = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V8I32.u = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V4I32.u = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V2I32.u = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1I32.u = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V32I16.u = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V16I16.u = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V8I16.u = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V4I16.u = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V2I16.u = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1I16.u = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- %V8F64.u = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
- %V4F64.u = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
- %V2F64.u = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
- %V1F64.u = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0f64(<vscale x 1 x double*> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
+ %V8F64.u = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
+ %V4F64.u = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
+ %V2F64.u = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+ %V1F64.u = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
- %V16F32.u = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0f32(<vscale x 16 x float*> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
- %V8F32.u = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
- %V4F32.u = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
- %V2F32.u = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0f32(<vscale x 2 x float*> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
- %V1F32.u = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0f32(<vscale x 1 x float*> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
+ %V16F32.u = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
+ %V8F32.u = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
+ %V4F32.u = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+ %V2F32.u = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+ %V1F32.u = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
- %V32F16.u = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0f16(<vscale x 32 x half*> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
- %V16F16.u = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0f16(<vscale x 16 x half*> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
- %V8F16.u = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
- %V4F16.u = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0f16(<vscale x 4 x half*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
- %V2F16.u = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0f16(<vscale x 2 x half*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
- %V1F16.u = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0f16(<vscale x 1 x half*> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
+ %V32F16.u = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
+ %V16F16.u = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
+ %V8F16.u = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
+ %V4F16.u = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
+ %V2F16.u = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
+ %V1F16.u = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
- %V8I64.u = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
- %V4I64.u = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0i64(<vscale x 4 x i64*> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
- %V2I64.u = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0i64(<vscale x 2 x i64*> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
- %V1I64.u = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0i64(<vscale x 1 x i64*> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
+ %V8I64.u = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
+ %V4I64.u = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
+ %V2I64.u = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
+ %V1I64.u = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
- %V16I32.u = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0i32(<vscale x 16 x i32*> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
- %V8I32.u = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
- %V4I32.u = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
- %V2I32.u = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
- %V1I32.u = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0i32(<vscale x 1 x i32*> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
+ %V16I32.u = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
+ %V8I32.u = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
+ %V4I32.u = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
+ %V2I32.u = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
+ %V1I32.u = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
- %V32I16.u = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0i16(<vscale x 32 x i16*> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
- %V16I16.u = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0i16(<vscale x 16 x i16*> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
- %V8I16.u = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
- %V4I16.u = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
- %V2I16.u = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
- %V1I16.u = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0i16(<vscale x 1 x i16*> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
+ %V32I16.u = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
+ %V16I16.u = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
+ %V8I16.u = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+ %V4I16.u = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+ %V2I16.u = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
+ %V1I16.u = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
- %V8PTR = call <vscale x 8 x i8*> @llvm.masked.gather.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8**> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i8*> undef)
- %V4PTR = call <vscale x 4 x i8*> @llvm.masked.gather.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8**> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i8*> undef)
- %V2PTR = call <vscale x 2 x i8*> @llvm.masked.gather.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8**> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8*> undef)
- %V1PTR= call <vscale x 1 x i8*> @llvm.masked.gather.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8**> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8*> undef)
+ %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef)
+ %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef)
+ %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef)
+ %V1PTR= call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef)
ret void
}
-declare <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*>, i32, <vscale x 8 x i1>, <vscale x 8 x double>)
-declare <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*>, i32, <vscale x 4 x i1>, <vscale x 4 x double>)
-declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*>, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
-declare <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0f64(<vscale x 1 x double*>, i32, <vscale x 1 x i1>, <vscale x 1 x double>)
+declare <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x double>)
+declare <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x double>)
+declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
+declare <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x double>)
-declare <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0f32(<vscale x 16 x float*>, i32, <vscale x 16 x i1>, <vscale x 16 x float>)
-declare <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*>, i32, <vscale x 8 x i1>, <vscale x 8 x float>)
-declare <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*>, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
-declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0f32(<vscale x 2 x float*>, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
-declare <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0f32(<vscale x 1 x float*>, i32, <vscale x 1 x i1>, <vscale x 1 x float>)
+declare <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr>, i32, <vscale x 16 x i1>, <vscale x 16 x float>)
+declare <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x float>)
+declare <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
+declare <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x float>)
-declare <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0f16(<vscale x 32 x half*>, i32, <vscale x 32 x i1>, <vscale x 32 x half>)
-declare <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0f16(<vscale x 16 x half*>, i32, <vscale x 16 x i1>, <vscale x 16 x half>)
-declare <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*>, i32, <vscale x 8 x i1>, <vscale x 8 x half>)
-declare <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0f16(<vscale x 4 x half*>, i32, <vscale x 4 x i1>, <vscale x 4 x half>)
-declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0f16(<vscale x 2 x half*>, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
-declare <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0f16(<vscale x 1 x half*>, i32, <vscale x 1 x i1>, <vscale x 1 x half>)
+declare <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr>, i32, <vscale x 32 x i1>, <vscale x 32 x half>)
+declare <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr>, i32, <vscale x 16 x i1>, <vscale x 16 x half>)
+declare <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x half>)
+declare <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x half>)
+declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
+declare <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x half>)
-declare <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*>, i32, <vscale x 8 x i1>, <vscale x 8 x i64>)
-declare <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0i64(<vscale x 4 x i64*>, i32, <vscale x 4 x i1>, <vscale x 4 x i64>)
-declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0i64(<vscale x 2 x i64*>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
-declare <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0i64(<vscale x 1 x i64*>, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)
+declare <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x i64>)
+declare <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x i64>)
+declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
+declare <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)
-declare <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0i32(<vscale x 16 x i32*>, i32, <vscale x 16 x i1>, <vscale x 16 x i32>)
-declare <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*>, i32, <vscale x 8 x i1>, <vscale x 8 x i32>)
-declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*>, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
-declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
-declare <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0i32(<vscale x 1 x i32*>, i32, <vscale x 1 x i1>, <vscale x 1 x i32>)
+declare <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr>, i32, <vscale x 16 x i1>, <vscale x 16 x i32>)
+declare <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x i32>)
+declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
+declare <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i32>)
-declare <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0i16(<vscale x 32 x i16*>, i32, <vscale x 32 x i1>, <vscale x 32 x i16>)
-declare <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0i16(<vscale x 16 x i16*>, i32, <vscale x 16 x i1>, <vscale x 16 x i16>)
-declare <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*>, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
-declare <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*>, i32, <vscale x 4 x i1>, <vscale x 4 x i16>)
-declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
-declare <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0i16(<vscale x 1 x i16*>, i32, <vscale x 1 x i1>, <vscale x 1 x i16>)
+declare <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr>, i32, <vscale x 32 x i1>, <vscale x 32 x i16>)
+declare <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr>, i32, <vscale x 16 x i1>, <vscale x 16 x i16>)
+declare <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x i16>)
+declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
+declare <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i16>)
-declare <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0i8(<vscale x 64 x i8*>, i32, <vscale x 64 x i1>, <vscale x 64 x i8>)
-declare <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0i8(<vscale x 32 x i8*>, i32, <vscale x 32 x i1>, <vscale x 32 x i8>)
-declare <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0i8(<vscale x 16 x i8*>, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
-declare <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0i8(<vscale x 8 x i8*>, i32, <vscale x 8 x i1>, <vscale x 8 x i8>)
-declare <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0i8(<vscale x 4 x i8*>, i32, <vscale x 4 x i1>, <vscale x 4 x i8>)
-declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
-declare <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0i8(<vscale x 1 x i8*>, i32, <vscale x 1 x i1>, <vscale x 1 x i8>)
+declare <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr>, i32, <vscale x 64 x i1>, <vscale x 64 x i8>)
+declare <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr>, i32, <vscale x 32 x i1>, <vscale x 32 x i8>)
+declare <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr>, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
+declare <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x i8>)
+declare <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x i8>)
+declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
+declare <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i8>)
-declare <vscale x 8 x i8*> @llvm.masked.gather.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8**>, i32, <vscale x 8 x i1>, <vscale x 8 x i8*>)
-declare <vscale x 4 x i8*> @llvm.masked.gather.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8**>, i32, <vscale x 4 x i1>, <vscale x 4 x i8*>)
-declare <vscale x 2 x i8*> @llvm.masked.gather.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8**>, i32, <vscale x 2 x i1>, <vscale x 2 x i8*>)
-declare <vscale x 1 x i8*> @llvm.masked.gather.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8**>, i32, <vscale x 1 x i1>, <vscale x 1 x i8*>)
+declare <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x ptr>)
+declare <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x ptr>)
+declare <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x ptr>)
+declare <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x ptr>)
define void @masked_scatter_aligned() {
; GENERIC-LABEL: 'masked_scatter_aligned'
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> undef, <vscale x 8 x double*> undef, i32 8, <vscale x 8 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> undef, <vscale x 4 x double*> undef, i32 8, <vscale x 4 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> undef, <vscale x 2 x double*> undef, i32 8, <vscale x 2 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double> undef, <vscale x 1 x double*> undef, i32 8, <vscale x 1 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0f32(<vscale x 16 x float> undef, <vscale x 16 x float*> undef, i32 4, <vscale x 16 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> undef, <vscale x 8 x float*> undef, i32 4, <vscale x 8 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> undef, <vscale x 4 x float*> undef, i32 4, <vscale x 4 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> undef, <vscale x 2 x float*> undef, i32 4, <vscale x 2 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float> undef, <vscale x 1 x float*> undef, i32 4, <vscale x 1 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0f16(<vscale x 32 x half> undef, <vscale x 32 x half*> undef, i32 2, <vscale x 32 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0f16(<vscale x 16 x half> undef, <vscale x 16 x half*> undef, i32 2, <vscale x 16 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> undef, <vscale x 8 x half*> undef, i32 2, <vscale x 8 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> undef, <vscale x 4 x half*> undef, i32 2, <vscale x 4 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half> undef, <vscale x 2 x half*> undef, i32 2, <vscale x 2 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half> undef, <vscale x 1 x half*> undef, i32 2, <vscale x 1 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> undef, <vscale x 8 x i64*> undef, i32 8, <vscale x 8 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> undef, <vscale x 4 x i64*> undef, i32 8, <vscale x 4 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64> undef, <vscale x 2 x i64*> undef, i32 8, <vscale x 2 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> undef, <vscale x 1 x i64*> undef, i32 8, <vscale x 1 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0i32(<vscale x 16 x i32> undef, <vscale x 16 x i32*> undef, i32 4, <vscale x 16 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 4, <vscale x 8 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 4, <vscale x 4 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> undef, <vscale x 2 x i32*> undef, i32 4, <vscale x 2 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32> undef, <vscale x 1 x i32*> undef, i32 4, <vscale x 1 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0i16(<vscale x 32 x i16> undef, <vscale x 32 x i16*> undef, i32 2, <vscale x 32 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16(<vscale x 16 x i16> undef, <vscale x 16 x i16*> undef, i32 2, <vscale x 16 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> undef, <vscale x 8 x i16*> undef, i32 2, <vscale x 8 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> undef, <vscale x 4 x i16*> undef, i32 2, <vscale x 4 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> undef, <vscale x 2 x i16*> undef, i32 2, <vscale x 2 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> undef, <vscale x 1 x i16*> undef, i32 2, <vscale x 1 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0i8(<vscale x 64 x i8> undef, <vscale x 64 x i8*> undef, i32 1, <vscale x 64 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0i8(<vscale x 32 x i8> undef, <vscale x 32 x i8*> undef, i32 1, <vscale x 32 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0i8(<vscale x 16 x i8> undef, <vscale x 16 x i8*> undef, i32 1, <vscale x 16 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> undef, <vscale x 8 x i8*> undef, i32 1, <vscale x 8 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> undef, <vscale x 4 x i8*> undef, i32 1, <vscale x 4 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> undef, <vscale x 2 x i8*> undef, i32 1, <vscale x 2 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8> undef, <vscale x 1 x i8*> undef, i32 1, <vscale x 1 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8*> undef, <vscale x 8 x i8**> undef, i32 8, <vscale x 8 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8*> undef, <vscale x 4 x i8**> undef, i32 8, <vscale x 4 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8*> undef, <vscale x 2 x i8**> undef, i32 8, <vscale x 2 x i1> undef)
-; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8*> undef, <vscale x 1 x i8**> undef, i32 8, <vscale x 1 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> undef, <vscale x 64 x ptr> undef, i32 1, <vscale x 64 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
+; GENERIC-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
; GENERIC-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; MAX256-LABEL: 'masked_scatter_aligned'
-; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> undef, <vscale x 8 x double*> undef, i32 8, <vscale x 8 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> undef, <vscale x 4 x double*> undef, i32 8, <vscale x 4 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> undef, <vscale x 2 x double*> undef, i32 8, <vscale x 2 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double> undef, <vscale x 1 x double*> undef, i32 8, <vscale x 1 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0f32(<vscale x 16 x float> undef, <vscale x 16 x float*> undef, i32 4, <vscale x 16 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> undef, <vscale x 8 x float*> undef, i32 4, <vscale x 8 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> undef, <vscale x 4 x float*> undef, i32 4, <vscale x 4 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> undef, <vscale x 2 x float*> undef, i32 4, <vscale x 2 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float> undef, <vscale x 1 x float*> undef, i32 4, <vscale x 1 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0f16(<vscale x 32 x half> undef, <vscale x 32 x half*> undef, i32 2, <vscale x 32 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0f16(<vscale x 16 x half> undef, <vscale x 16 x half*> undef, i32 2, <vscale x 16 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> undef, <vscale x 8 x half*> undef, i32 2, <vscale x 8 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> undef, <vscale x 4 x half*> undef, i32 2, <vscale x 4 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half> undef, <vscale x 2 x half*> undef, i32 2, <vscale x 2 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half> undef, <vscale x 1 x half*> undef, i32 2, <vscale x 1 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> undef, <vscale x 8 x i64*> undef, i32 8, <vscale x 8 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> undef, <vscale x 4 x i64*> undef, i32 8, <vscale x 4 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64> undef, <vscale x 2 x i64*> undef, i32 8, <vscale x 2 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> undef, <vscale x 1 x i64*> undef, i32 8, <vscale x 1 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0i32(<vscale x 16 x i32> undef, <vscale x 16 x i32*> undef, i32 4, <vscale x 16 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 4, <vscale x 8 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 4, <vscale x 4 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> undef, <vscale x 2 x i32*> undef, i32 4, <vscale x 2 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32> undef, <vscale x 1 x i32*> undef, i32 4, <vscale x 1 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0i16(<vscale x 32 x i16> undef, <vscale x 32 x i16*> undef, i32 2, <vscale x 32 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16(<vscale x 16 x i16> undef, <vscale x 16 x i16*> undef, i32 2, <vscale x 16 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> undef, <vscale x 8 x i16*> undef, i32 2, <vscale x 8 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> undef, <vscale x 4 x i16*> undef, i32 2, <vscale x 4 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> undef, <vscale x 2 x i16*> undef, i32 2, <vscale x 2 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> undef, <vscale x 1 x i16*> undef, i32 2, <vscale x 1 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0i8(<vscale x 64 x i8> undef, <vscale x 64 x i8*> undef, i32 1, <vscale x 64 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0i8(<vscale x 32 x i8> undef, <vscale x 32 x i8*> undef, i32 1, <vscale x 32 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0i8(<vscale x 16 x i8> undef, <vscale x 16 x i8*> undef, i32 1, <vscale x 16 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> undef, <vscale x 8 x i8*> undef, i32 1, <vscale x 8 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> undef, <vscale x 4 x i8*> undef, i32 1, <vscale x 4 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> undef, <vscale x 2 x i8*> undef, i32 1, <vscale x 2 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8> undef, <vscale x 1 x i8*> undef, i32 1, <vscale x 1 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8*> undef, <vscale x 8 x i8**> undef, i32 8, <vscale x 8 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8*> undef, <vscale x 4 x i8**> undef, i32 8, <vscale x 4 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8*> undef, <vscale x 2 x i8**> undef, i32 8, <vscale x 2 x i1> undef)
-; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8*> undef, <vscale x 1 x i8**> undef, i32 8, <vscale x 1 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> undef, <vscale x 64 x ptr> undef, i32 1, <vscale x 64 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
+; MAX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
; MAX256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; UNSUPPORTED-LABEL: 'masked_scatter_aligned'
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> undef, <vscale x 8 x double*> undef, i32 8, <vscale x 8 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> undef, <vscale x 4 x double*> undef, i32 8, <vscale x 4 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> undef, <vscale x 2 x double*> undef, i32 8, <vscale x 2 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double> undef, <vscale x 1 x double*> undef, i32 8, <vscale x 1 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0f32(<vscale x 16 x float> undef, <vscale x 16 x float*> undef, i32 4, <vscale x 16 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> undef, <vscale x 8 x float*> undef, i32 4, <vscale x 8 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> undef, <vscale x 4 x float*> undef, i32 4, <vscale x 4 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> undef, <vscale x 2 x float*> undef, i32 4, <vscale x 2 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float> undef, <vscale x 1 x float*> undef, i32 4, <vscale x 1 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0f16(<vscale x 32 x half> undef, <vscale x 32 x half*> undef, i32 2, <vscale x 32 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0f16(<vscale x 16 x half> undef, <vscale x 16 x half*> undef, i32 2, <vscale x 16 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> undef, <vscale x 8 x half*> undef, i32 2, <vscale x 8 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> undef, <vscale x 4 x half*> undef, i32 2, <vscale x 4 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half> undef, <vscale x 2 x half*> undef, i32 2, <vscale x 2 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half> undef, <vscale x 1 x half*> undef, i32 2, <vscale x 1 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> undef, <vscale x 8 x i64*> undef, i32 8, <vscale x 8 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> undef, <vscale x 4 x i64*> undef, i32 8, <vscale x 4 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64> undef, <vscale x 2 x i64*> undef, i32 8, <vscale x 2 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> undef, <vscale x 1 x i64*> undef, i32 8, <vscale x 1 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0i32(<vscale x 16 x i32> undef, <vscale x 16 x i32*> undef, i32 4, <vscale x 16 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 4, <vscale x 8 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 4, <vscale x 4 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> undef, <vscale x 2 x i32*> undef, i32 4, <vscale x 2 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32> undef, <vscale x 1 x i32*> undef, i32 4, <vscale x 1 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0i16(<vscale x 32 x i16> undef, <vscale x 32 x i16*> undef, i32 2, <vscale x 32 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16(<vscale x 16 x i16> undef, <vscale x 16 x i16*> undef, i32 2, <vscale x 16 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> undef, <vscale x 8 x i16*> undef, i32 2, <vscale x 8 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> undef, <vscale x 4 x i16*> undef, i32 2, <vscale x 4 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> undef, <vscale x 2 x i16*> undef, i32 2, <vscale x 2 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> undef, <vscale x 1 x i16*> undef, i32 2, <vscale x 1 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0i8(<vscale x 64 x i8> undef, <vscale x 64 x i8*> undef, i32 1, <vscale x 64 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0i8(<vscale x 32 x i8> undef, <vscale x 32 x i8*> undef, i32 1, <vscale x 32 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0i8(<vscale x 16 x i8> undef, <vscale x 16 x i8*> undef, i32 1, <vscale x 16 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> undef, <vscale x 8 x i8*> undef, i32 1, <vscale x 8 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> undef, <vscale x 4 x i8*> undef, i32 1, <vscale x 4 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> undef, <vscale x 2 x i8*> undef, i32 1, <vscale x 2 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8> undef, <vscale x 1 x i8*> undef, i32 1, <vscale x 1 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8*> undef, <vscale x 8 x i8**> undef, i32 8, <vscale x 8 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8*> undef, <vscale x 4 x i8**> undef, i32 8, <vscale x 4 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8*> undef, <vscale x 2 x i8**> undef, i32 8, <vscale x 2 x i1> undef)
-; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8*> undef, <vscale x 1 x i8**> undef, i32 8, <vscale x 1 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> undef, <vscale x 64 x ptr> undef, i32 1, <vscale x 64 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
+; UNSUPPORTED-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
; UNSUPPORTED-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> undef, <vscale x 8 x double*> undef, i32 8, <vscale x 8 x i1> undef)
- call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> undef, <vscale x 4 x double*> undef, i32 8, <vscale x 4 x i1> undef)
- call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> undef, <vscale x 2 x double*> undef, i32 8, <vscale x 2 x i1> undef)
- call void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double> undef, <vscale x 1 x double*> undef, i32 8, <vscale x 1 x i1> undef)
+ call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
+ call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
+ call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
+ call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
- call void @llvm.masked.scatter.nxv16f32.nxv16p0f32(<vscale x 16 x float> undef, <vscale x 16 x float*> undef, i32 4, <vscale x 16 x i1> undef)
- call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> undef, <vscale x 8 x float*> undef, i32 4, <vscale x 8 x i1> undef)
- call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> undef, <vscale x 4 x float*> undef, i32 4, <vscale x 4 x i1> undef)
- call void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> undef, <vscale x 2 x float*> undef, i32 4, <vscale x 2 x i1> undef)
- call void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float> undef, <vscale x 1 x float*> undef, i32 4, <vscale x 1 x i1> undef)
+ call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef)
+ call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef)
+ call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
+ call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
+ call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
- call void @llvm.masked.scatter.nxv32f16.nxv32p0f16(<vscale x 32 x half> undef, <vscale x 32 x half*> undef, i32 2, <vscale x 32 x i1> undef)
- call void @llvm.masked.scatter.nxv16f16.nxv16p0f16(<vscale x 16 x half> undef, <vscale x 16 x half*> undef, i32 2, <vscale x 16 x i1> undef)
- call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> undef, <vscale x 8 x half*> undef, i32 2, <vscale x 8 x i1> undef)
- call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> undef, <vscale x 4 x half*> undef, i32 2, <vscale x 4 x i1> undef)
- call void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half> undef, <vscale x 2 x half*> undef, i32 2, <vscale x 2 x i1> undef)
- call void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half> undef, <vscale x 1 x half*> undef, i32 2, <vscale x 1 x i1> undef)
+ call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
+ call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
+ call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
+ call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
+ call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
+ call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
- call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> undef, <vscale x 8 x i64*> undef, i32 8, <vscale x 8 x i1> undef)
- call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> undef, <vscale x 4 x i64*> undef, i32 8, <vscale x 4 x i1> undef)
- call void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64> undef, <vscale x 2 x i64*> undef, i32 8, <vscale x 2 x i1> undef)
- call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> undef, <vscale x 1 x i64*> undef, i32 8, <vscale x 1 x i1> undef)
+ call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
+ call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
+ call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
+ call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
- call void @llvm.masked.scatter.nxv16i32.nxv16p0i32(<vscale x 16 x i32> undef, <vscale x 16 x i32*> undef, i32 4, <vscale x 16 x i1> undef)
- call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 4, <vscale x 8 x i1> undef)
- call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 4, <vscale x 4 x i1> undef)
- call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> undef, <vscale x 2 x i32*> undef, i32 4, <vscale x 2 x i1> undef)
- call void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32> undef, <vscale x 1 x i32*> undef, i32 4, <vscale x 1 x i1> undef)
+ call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef)
+ call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef)
+ call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
+ call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
+ call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
- call void @llvm.masked.scatter.nxv32i16.nxv32p0i16(<vscale x 32 x i16> undef, <vscale x 32 x i16*> undef, i32 2, <vscale x 32 x i1> undef)
- call void @llvm.masked.scatter.nxv16i16.nxv16p0i16(<vscale x 16 x i16> undef, <vscale x 16 x i16*> undef, i32 2, <vscale x 16 x i1> undef)
- call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> undef, <vscale x 8 x i16*> undef, i32 2, <vscale x 8 x i1> undef)
- call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> undef, <vscale x 4 x i16*> undef, i32 2, <vscale x 4 x i1> undef)
- call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> undef, <vscale x 2 x i16*> undef, i32 2, <vscale x 2 x i1> undef)
- call void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> undef, <vscale x 1 x i16*> undef, i32 2, <vscale x 1 x i1> undef)
+ call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
+ call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
+ call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
+ call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
+ call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
+ call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
- call void @llvm.masked.scatter.nxv64i8.nxv64p0i8(<vscale x 64 x i8> undef, <vscale x 64 x i8*> undef, i32 1, <vscale x 64 x i1> undef)
- call void @llvm.masked.scatter.nxv32i8.nxv32p0i8(<vscale x 32 x i8> undef, <vscale x 32 x i8*> undef, i32 1, <vscale x 32 x i1> undef)
- call void @llvm.masked.scatter.nxv16i8.nxv16p0i8(<vscale x 16 x i8> undef, <vscale x 16 x i8*> undef, i32 1, <vscale x 16 x i1> undef)
- call void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> undef, <vscale x 8 x i8*> undef, i32 1, <vscale x 8 x i1> undef)
- call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> undef, <vscale x 4 x i8*> undef, i32 1, <vscale x 4 x i1> undef)
- call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> undef, <vscale x 2 x i8*> undef, i32 1, <vscale x 2 x i1> undef)
- call void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8> undef, <vscale x 1 x i8*> undef, i32 1, <vscale x 1 x i1> undef)
+ call void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> undef, <vscale x 64 x ptr> undef, i32 1, <vscale x 64 x i1> undef)
+ call void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
+ call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
+ call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+ call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+ call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+ call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
- call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8*> undef, <vscale x 8 x i8**> undef, i32 8, <vscale x 8 x i1> undef)
- call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8*> undef, <vscale x 4 x i8**> undef, i32 8, <vscale x 4 x i1> undef)
- call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8*> undef, <vscale x 2 x i8**> undef, i32 8, <vscale x 2 x i1> undef)
- call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8*> undef, <vscale x 1 x i8**> undef, i32 8, <vscale x 1 x i1> undef)
+ call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
+ call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
+ call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
+ call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
ret void
}
define void @masked_scatter_unaligned() {
; CHECK-LABEL: 'masked_scatter_unaligned'
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> undef, <vscale x 8 x double*> undef, i32 2, <vscale x 8 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> undef, <vscale x 4 x double*> undef, i32 2, <vscale x 4 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> undef, <vscale x 2 x double*> undef, i32 2, <vscale x 2 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double> undef, <vscale x 1 x double*> undef, i32 2, <vscale x 1 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0f32(<vscale x 16 x float> undef, <vscale x 16 x float*> undef, i32 2, <vscale x 16 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> undef, <vscale x 8 x float*> undef, i32 2, <vscale x 8 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> undef, <vscale x 4 x float*> undef, i32 2, <vscale x 4 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> undef, <vscale x 2 x float*> undef, i32 2, <vscale x 2 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float> undef, <vscale x 1 x float*> undef, i32 2, <vscale x 1 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0f16(<vscale x 32 x half> undef, <vscale x 32 x half*> undef, i32 1, <vscale x 32 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0f16(<vscale x 16 x half> undef, <vscale x 16 x half*> undef, i32 1, <vscale x 16 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> undef, <vscale x 8 x half*> undef, i32 1, <vscale x 8 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> undef, <vscale x 4 x half*> undef, i32 1, <vscale x 4 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half> undef, <vscale x 2 x half*> undef, i32 1, <vscale x 2 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half> undef, <vscale x 1 x half*> undef, i32 1, <vscale x 1 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> undef, <vscale x 8 x i64*> undef, i32 1, <vscale x 8 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> undef, <vscale x 4 x i64*> undef, i32 1, <vscale x 4 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64> undef, <vscale x 2 x i64*> undef, i32 1, <vscale x 2 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> undef, <vscale x 1 x i64*> undef, i32 1, <vscale x 1 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0i32(<vscale x 16 x i32> undef, <vscale x 16 x i32*> undef, i32 1, <vscale x 16 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 1, <vscale x 8 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 1, <vscale x 4 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> undef, <vscale x 2 x i32*> undef, i32 1, <vscale x 2 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32> undef, <vscale x 1 x i32*> undef, i32 1, <vscale x 1 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0i16(<vscale x 32 x i16> undef, <vscale x 32 x i16*> undef, i32 1, <vscale x 32 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16(<vscale x 16 x i16> undef, <vscale x 16 x i16*> undef, i32 1, <vscale x 16 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> undef, <vscale x 8 x i16*> undef, i32 1, <vscale x 8 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> undef, <vscale x 4 x i16*> undef, i32 1, <vscale x 4 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> undef, <vscale x 2 x i16*> undef, i32 1, <vscale x 2 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> undef, <vscale x 1 x i16*> undef, i32 1, <vscale x 1 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8*> undef, <vscale x 8 x i8**> undef, i32 1, <vscale x 8 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8*> undef, <vscale x 4 x i8**> undef, i32 1, <vscale x 4 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8*> undef, <vscale x 2 x i8**> undef, i32 1, <vscale x 2 x i1> undef)
-; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8*> undef, <vscale x 1 x i8**> undef, i32 1, <vscale x 1 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> undef, <vscale x 8 x double*> undef, i32 2, <vscale x 8 x i1> undef)
- call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> undef, <vscale x 4 x double*> undef, i32 2, <vscale x 4 x i1> undef)
- call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> undef, <vscale x 2 x double*> undef, i32 2, <vscale x 2 x i1> undef)
- call void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double> undef, <vscale x 1 x double*> undef, i32 2, <vscale x 1 x i1> undef)
+ call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
+ call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
+ call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
+ call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
- call void @llvm.masked.scatter.nxv16f32.nxv16p0f32(<vscale x 16 x float> undef, <vscale x 16 x float*> undef, i32 2, <vscale x 16 x i1> undef)
- call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> undef, <vscale x 8 x float*> undef, i32 2, <vscale x 8 x i1> undef)
- call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> undef, <vscale x 4 x float*> undef, i32 2, <vscale x 4 x i1> undef)
- call void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> undef, <vscale x 2 x float*> undef, i32 2, <vscale x 2 x i1> undef)
- call void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float> undef, <vscale x 1 x float*> undef, i32 2, <vscale x 1 x i1> undef)
+ call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
+ call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
+ call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
+ call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
+ call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
- call void @llvm.masked.scatter.nxv32f16.nxv32p0f16(<vscale x 32 x half> undef, <vscale x 32 x half*> undef, i32 1, <vscale x 32 x i1> undef)
- call void @llvm.masked.scatter.nxv16f16.nxv16p0f16(<vscale x 16 x half> undef, <vscale x 16 x half*> undef, i32 1, <vscale x 16 x i1> undef)
- call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> undef, <vscale x 8 x half*> undef, i32 1, <vscale x 8 x i1> undef)
- call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> undef, <vscale x 4 x half*> undef, i32 1, <vscale x 4 x i1> undef)
- call void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half> undef, <vscale x 2 x half*> undef, i32 1, <vscale x 2 x i1> undef)
- call void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half> undef, <vscale x 1 x half*> undef, i32 1, <vscale x 1 x i1> undef)
+ call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
+ call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
+ call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+ call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+ call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+ call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
- call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> undef, <vscale x 8 x i64*> undef, i32 1, <vscale x 8 x i1> undef)
- call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> undef, <vscale x 4 x i64*> undef, i32 1, <vscale x 4 x i1> undef)
- call void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64> undef, <vscale x 2 x i64*> undef, i32 1, <vscale x 2 x i1> undef)
- call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> undef, <vscale x 1 x i64*> undef, i32 1, <vscale x 1 x i1> undef)
+ call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+ call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+ call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+ call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
- call void @llvm.masked.scatter.nxv16i32.nxv16p0i32(<vscale x 16 x i32> undef, <vscale x 16 x i32*> undef, i32 1, <vscale x 16 x i1> undef)
- call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 1, <vscale x 8 x i1> undef)
- call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 1, <vscale x 4 x i1> undef)
- call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> undef, <vscale x 2 x i32*> undef, i32 1, <vscale x 2 x i1> undef)
- call void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32> undef, <vscale x 1 x i32*> undef, i32 1, <vscale x 1 x i1> undef)
+ call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
+ call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+ call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+ call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+ call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
- call void @llvm.masked.scatter.nxv32i16.nxv32p0i16(<vscale x 32 x i16> undef, <vscale x 32 x i16*> undef, i32 1, <vscale x 32 x i1> undef)
- call void @llvm.masked.scatter.nxv16i16.nxv16p0i16(<vscale x 16 x i16> undef, <vscale x 16 x i16*> undef, i32 1, <vscale x 16 x i1> undef)
- call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> undef, <vscale x 8 x i16*> undef, i32 1, <vscale x 8 x i1> undef)
- call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> undef, <vscale x 4 x i16*> undef, i32 1, <vscale x 4 x i1> undef)
- call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> undef, <vscale x 2 x i16*> undef, i32 1, <vscale x 2 x i1> undef)
- call void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> undef, <vscale x 1 x i16*> undef, i32 1, <vscale x 1 x i1> undef)
+ call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
+ call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
+ call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+ call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+ call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+ call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
- call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8*> undef, <vscale x 8 x i8**> undef, i32 1, <vscale x 8 x i1> undef)
- call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8*> undef, <vscale x 4 x i8**> undef, i32 1, <vscale x 4 x i1> undef)
- call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8*> undef, <vscale x 2 x i8**> undef, i32 1, <vscale x 2 x i1> undef)
- call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8*> undef, <vscale x 1 x i8**> undef, i32 1, <vscale x 1 x i1> undef)
+ call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+ call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+ call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+ call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
ret void
}
-declare void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double>, <vscale x 8 x double*>, i32, <vscale x 8 x i1>)
-declare void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double>, <vscale x 4 x double*>, i32, <vscale x 4 x i1>)
-declare void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>)
-declare void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double>, <vscale x 1 x double*>, i32, <vscale x 1 x i1>)
+declare void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
+declare void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
+declare void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
-declare void @llvm.masked.scatter.nxv16f32.nxv16p0f32(<vscale x 16 x float>, <vscale x 16 x float*>, i32, <vscale x 16 x i1>)
-declare void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float>, <vscale x 8 x float*>, i32, <vscale x 8 x i1>)
-declare void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float>, <vscale x 4 x float*>, i32, <vscale x 4 x i1>)
-declare void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>)
-declare void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float>, <vscale x 1 x float*>, i32, <vscale x 1 x i1>)
+declare void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float>, <vscale x 16 x ptr>, i32, <vscale x 16 x i1>)
+declare void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
+declare void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
+declare void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
-declare void @llvm.masked.scatter.nxv32f16.nxv32p0f16(<vscale x 32 x half>, <vscale x 32 x half*>, i32, <vscale x 32 x i1>)
-declare void @llvm.masked.scatter.nxv16f16.nxv16p0f16(<vscale x 16 x half>, <vscale x 16 x half*>, i32, <vscale x 16 x i1>)
-declare void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half>, <vscale x 8 x half*>, i32, <vscale x 8 x i1>)
-declare void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half>, <vscale x 4 x half*>, i32, <vscale x 4 x i1>)
-declare void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half>, <vscale x 2 x half*>, i32, <vscale x 2 x i1>)
-declare void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half>, <vscale x 1 x half*>, i32, <vscale x 1 x i1>)
+declare void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half>, <vscale x 32 x ptr>, i32, <vscale x 32 x i1>)
+declare void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half>, <vscale x 16 x ptr>, i32, <vscale x 16 x i1>)
+declare void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
+declare void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
+declare void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
-declare void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64>, <vscale x 8 x i64*>, i32, <vscale x 8 x i1>)
-declare void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64>, <vscale x 4 x i64*>, i32, <vscale x 4 x i1>)
-declare void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>)
-declare void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64>, <vscale x 1 x i64*>, i32, <vscale x 1 x i1>)
+declare void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
+declare void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
+declare void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
-declare void @llvm.masked.scatter.nxv16i32.nxv16p0i32(<vscale x 16 x i32>, <vscale x 16 x i32*>, i32, <vscale x 16 x i1>)
-declare void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32>, <vscale x 8 x i32*>, i32, <vscale x 8 x i1>)
-declare void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32>, <vscale x 4 x i32*>, i32, <vscale x 4 x i1>)
-declare void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, i32, <vscale x 2 x i1>)
-declare void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32>, <vscale x 1 x i32*>, i32, <vscale x 1 x i1>)
+declare void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32>, <vscale x 16 x ptr>, i32, <vscale x 16 x i1>)
+declare void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
+declare void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
+declare void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
-declare void @llvm.masked.scatter.nxv32i16.nxv32p0i16(<vscale x 32 x i16>, <vscale x 32 x i16*>, i32, <vscale x 32 x i1>)
-declare void @llvm.masked.scatter.nxv16i16.nxv16p0i16(<vscale x 16 x i16>, <vscale x 16 x i16*>, i32, <vscale x 16 x i1>)
-declare void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16>, <vscale x 8 x i16*>, i32, <vscale x 8 x i1>)
-declare void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16>, <vscale x 4 x i16*>, i32, <vscale x 4 x i1>)
-declare void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, i32, <vscale x 2 x i1>)
-declare void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16>, <vscale x 1 x i16*>, i32, <vscale x 1 x i1>)
+declare void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16>, <vscale x 32 x ptr>, i32, <vscale x 32 x i1>)
+declare void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16>, <vscale x 16 x ptr>, i32, <vscale x 16 x i1>)
+declare void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
+declare void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
+declare void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
-declare void @llvm.masked.scatter.nxv64i8.nxv64p0i8(<vscale x 64 x i8>, <vscale x 64 x i8*>, i32, <vscale x 64 x i1>)
-declare void @llvm.masked.scatter.nxv32i8.nxv32p0i8(<vscale x 32 x i8>, <vscale x 32 x i8*>, i32, <vscale x 32 x i1>)
-declare void @llvm.masked.scatter.nxv16i8.nxv16p0i8(<vscale x 16 x i8>, <vscale x 16 x i8*>, i32, <vscale x 16 x i1>)
-declare void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8>, <vscale x 8 x i8*>, i32, <vscale x 8 x i1>)
-declare void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8>, <vscale x 4 x i8*>, i32, <vscale x 4 x i1>)
-declare void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, i32, <vscale x 2 x i1>)
-declare void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8>, <vscale x 1 x i8*>, i32, <vscale x 1 x i1>)
+declare void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8>, <vscale x 64 x ptr>, i32, <vscale x 64 x i1>)
+declare void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8>, <vscale x 32 x ptr>, i32, <vscale x 32 x i1>)
+declare void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8>, <vscale x 16 x ptr>, i32, <vscale x 16 x i1>)
+declare void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
+declare void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
+declare void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
-declare void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8*>, <vscale x 8 x i8**>, i32, <vscale x 8 x i1>)
-declare void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8*>, <vscale x 4 x i8**>, i32, <vscale x 4 x i1>)
-declare void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8*>, <vscale x 2 x i8**>, i32, <vscale x 2 x i1>)
-declare void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8*>, <vscale x 1 x i8**>, i32, <vscale x 1 x i1>)
+declare void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
+declare void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
+declare void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
;
; Test costs for i8 and i16 comparisons against memory with a small immediate.
-define i32 @fun0(i8* %Src, i8* %Dst, i8 %Val) {
+define i32 @fun0(ptr %Src, ptr %Dst, i8 %Val) {
; CHECK: function 'fun0'
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld = load i8, i8* %Src
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld = load i8, ptr %Src
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Cmp = icmp eq i8 %Ld, 123
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %Ret = zext i1 %Cmp to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %Ret
- %Ld = load i8, i8* %Src
+ %Ld = load i8, ptr %Src
%Cmp = icmp eq i8 %Ld, 123
%Ret = zext i1 %Cmp to i32
ret i32 %Ret
}
-define i32 @fun1(i16* %Src, i16* %Dst, i16 %Val) {
+define i32 @fun1(ptr %Src, ptr %Dst, i16 %Val) {
; CHECK: function 'fun1'
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld = load i16, i16* %Src
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld = load i16, ptr %Src
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Cmp = icmp eq i16
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %Ret = zext i1 %Cmp to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %Ret
- %Ld = load i16, i16* %Src
+ %Ld = load i16, ptr %Src
%Cmp = icmp eq i16 %Ld, 1234
%Ret = zext i1 %Cmp to i32
ret i32 %Ret
; where the load performs the extension.
define void @sext() {
- %li8 = load i8, i8* undef
+ %li8 = load i8, ptr undef
sext i8 %li8 to i16
sext i8 %li8 to i32
sext i8 %li8 to i64
- %li16 = load i16, i16* undef
+ %li16 = load i16, ptr undef
sext i16 %li16 to i32
sext i16 %li16 to i64
- %li32 = load i32, i32* undef
+ %li32 = load i32, ptr undef
sext i32 %li32 to i64
ret void
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li8 = load i8, i8* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li8 = load i8, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %1 = sext i8 %li8 to i16
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %2 = sext i8 %li8 to i32
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %3 = sext i8 %li8 to i64
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16 = load i16, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %4 = sext i16 %li16 to i32
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %5 = sext i16 %li16 to i64
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %6 = sext i32 %li32 to i64
}
define void @zext() {
- %li8 = load i8, i8* undef
+ %li8 = load i8, ptr undef
zext i8 %li8 to i16
zext i8 %li8 to i32
zext i8 %li8 to i64
- %li16 = load i16, i16* undef
+ %li16 = load i16, ptr undef
zext i16 %li16 to i32
zext i16 %li16 to i64
- %li32 = load i32, i32* undef
+ %li32 = load i32, ptr undef
zext i32 %li32 to i64
ret void
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li8 = load i8, i8* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li8 = load i8, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %1 = zext i8 %li8 to i16
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %2 = zext i8 %li8 to i32
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %3 = zext i8 %li8 to i64
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16 = load i16, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %4 = zext i16 %li16 to i32
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %5 = zext i16 %li16 to i64
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %6 = zext i32 %li32 to i64
}
ret void;
}
-define void @sitofp_extload(i16 *%src16, i8 *%src8) {
- %ld16 = load i16, i16 *%src16
+define void @sitofp_extload(ptr %src16, ptr %src8) {
+ %ld16 = load i16, ptr %src16
%v6 = sitofp i16 %ld16 to fp128
%v7 = sitofp i16 %ld16 to double
%v8 = sitofp i16 %ld16 to float
- %ld8 = load i8, i8 *%src8
+ %ld8 = load i8, ptr %src8
%v9 = sitofp i8 %ld8 to fp128
%v10 = sitofp i8 %ld8 to double
%v11 = sitofp i8 %ld8 to float
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %ld16 = load i16, i16* %src16
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %ld16 = load i16, ptr %src16
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v6 = sitofp i16 %ld16 to fp128
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v7 = sitofp i16 %ld16 to double
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v8 = sitofp i16 %ld16 to float
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %ld8 = load i8, i8* %src8
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %ld8 = load i8, ptr %src8
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v9 = sitofp i8 %ld8 to fp128
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v10 = sitofp i8 %ld8 to double
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v11 = sitofp i8 %ld8 to float
ret void;
}
-define void @uitofp_extload(i16 *%src16, i8 *%src8) {
- %ld16 = load i16, i16 *%src16
+define void @uitofp_extload(ptr %src16, ptr %src8) {
+ %ld16 = load i16, ptr %src16
%v6 = uitofp i16 %ld16 to fp128
%v7 = uitofp i16 %ld16 to double
%v8 = uitofp i16 %ld16 to float
- %ld8 = load i8, i8 *%src8
+ %ld8 = load i8, ptr %src8
%v9 = uitofp i8 %ld8 to fp128
%v10 = uitofp i8 %ld8 to double
%v11 = uitofp i8 %ld8 to float
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %ld16 = load i16, i16* %src16
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %ld16 = load i16, ptr %src16
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v6 = uitofp i16 %ld16 to fp128
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v7 = uitofp i16 %ld16 to double
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v8 = uitofp i16 %ld16 to float
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %ld8 = load i8, i8* %src8
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %ld8 = load i8, ptr %src8
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v9 = uitofp i8 %ld8 to fp128
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v10 = uitofp i8 %ld8 to double
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %v11 = uitofp i8 %ld8 to float
; Test that cost functions can handle immediates of more than 64 bits without crashing.
; Cost of a load which is checked for folding into a compare w/ memory.
-define i32 @fun0(i72* %Src) {
- %L = load i72, i72* %Src
+define i32 @fun0(ptr %Src) {
+ %L = load i72, ptr %Src
%B = icmp ult i72 %L, 166153499473114484112
%Res = zext i1 %B to i32
ret i32 %Res
}
; Cost of a compare which is checked for elimination by Load and Test.
-define i32 @fun1(i72* %Src, i72* %Dst) {
- %L = load i72, i72* %Src
- store i72 %L, i72* %Dst
+define i32 @fun1(ptr %Src, ptr %Dst) {
+ %L = load i72, ptr %Src
+ store i72 %L, ptr %Dst
%B = icmp ult i72 %L, 166153499473114484112
%Res = zext i1 %B to i32
ret i32 %Res
; cases where this is needed.
define void @icmp() {
- %li8_0 = load i8, i8* undef
- %li8_1 = load i8, i8* undef
+ %li8_0 = load i8, ptr undef
+ %li8_1 = load i8, ptr undef
icmp slt i8 %li8_0, %li8_1
%a0 = add i8 %li8_0, 1
icmp slt i8 %a0, 123
- %li16_0 = load i16, i16* undef
- %li16_1 = load i16, i16* undef
+ %li16_0 = load i16, ptr undef
+ %li16_1 = load i16, ptr undef
icmp slt i16 %li16_0, %li16_1
%a2 = add i16 %li16_0, 1
ret void;
; CHECK: function 'icmp'
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li8_0 = load i8, i8* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li8_1 = load i8, i8* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li8_0 = load i8, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li8_1 = load i8, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = icmp slt i8 %li8_0, %li8_1
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %a0 = add i8 %li8_0, 1
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %a1 = add i8 %li8_1, 1
; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %2 = icmp slt i8 %a0, %a1
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %3 = icmp slt i8 %a0, 123
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_0 = load i16, i16* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_1 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_0 = load i16, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_1 = load i16, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = icmp slt i16 %li16_0, %li16_1
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %a2 = add i16 %li16_0, 1
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %a3 = add i16 %li16_1, 1
}
; Test that store/load reversed is reflected in costs.
-define void @bswap_i64_mem(i64* %src, i64 %arg, i64* %dst) {
+define void @bswap_i64_mem(ptr %src, i64 %arg, ptr %dst) {
; CHECK: function 'bswap_i64_mem'
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i64, i64* %src
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i64, ptr %src
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i64 @llvm.bswap.i64(i64 %Ld1)
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call i64 @llvm.bswap.i64(i64 %arg)
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i64 %swp2, i64* %dst
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load i64, i64* %src
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i64 %swp2, ptr %dst
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load i64, ptr %src
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call i64 @llvm.bswap.i64(i64 %Ld2)
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i64 %swp3, i64* %dst
- %Ld1 = load i64, i64* %src
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i64 %swp3, ptr %dst
+ %Ld1 = load i64, ptr %src
%swp1 = tail call i64 @llvm.bswap.i64(i64 %Ld1)
%swp2 = tail call i64 @llvm.bswap.i64(i64 %arg)
- store i64 %swp2, i64* %dst
+ store i64 %swp2, ptr %dst
- %Ld2 = load i64, i64* %src
+ %Ld2 = load i64, ptr %src
%swp3 = tail call i64 @llvm.bswap.i64(i64 %Ld2)
- store i64 %swp3, i64* %dst
+ store i64 %swp3, ptr %dst
ret void
}
-define void @bswap_v2i64_mem(<2 x i64>* %src, <2 x i64> %arg, <2 x i64>* %dst) {
+define void @bswap_v2i64_mem(ptr %src, <2 x i64> %arg, ptr %dst) {
; CHECK:function 'bswap_v2i64_mem'
-; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <2 x i64>, <2 x i64>* %src
-; Z15: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <2 x i64>, <2 x i64>* %src
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <2 x i64>, ptr %src
+; Z15: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <2 x i64>, ptr %src
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld1)
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %arg)
-; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %swp2, <2 x i64>* %dst
-; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <2 x i64> %swp2, <2 x i64>* %dst
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <2 x i64>, <2 x i64>* %src
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %swp2, ptr %dst
+; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <2 x i64> %swp2, ptr %dst
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <2 x i64>, ptr %src
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld2)
-; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %swp3, <2 x i64>* %dst
-; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <2 x i64> %swp3, <2 x i64>* %dst
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %swp3, ptr %dst
+; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <2 x i64> %swp3, ptr %dst
- %Ld1 = load <2 x i64>, <2 x i64>* %src
+ %Ld1 = load <2 x i64>, ptr %src
%swp1 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld1)
%swp2 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %arg)
- store <2 x i64> %swp2, <2 x i64>* %dst
+ store <2 x i64> %swp2, ptr %dst
- %Ld2 = load <2 x i64>, <2 x i64>* %src
+ %Ld2 = load <2 x i64>, ptr %src
%swp3 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld2)
- store <2 x i64> %swp3, <2 x i64>* %dst
+ store <2 x i64> %swp3, ptr %dst
ret void
}
-define void @bswap_i32_mem(i32* %src, i32 %arg, i32* %dst) {
+define void @bswap_i32_mem(ptr %src, i32 %arg, ptr %dst) {
; CHECK: function 'bswap_i32_mem'
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i32, i32* %src
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i32, ptr %src
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i32 @llvm.bswap.i32(i32 %Ld1)
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call i32 @llvm.bswap.i32(i32 %arg)
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i32 %swp2, i32* %dst
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load i32, i32* %src
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i32 %swp2, ptr %dst
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load i32, ptr %src
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call i32 @llvm.bswap.i32(i32 %Ld2)
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i32 %swp3, i32* %dst
- %Ld1 = load i32, i32* %src
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i32 %swp3, ptr %dst
+ %Ld1 = load i32, ptr %src
%swp1 = tail call i32 @llvm.bswap.i32(i32 %Ld1)
%swp2 = tail call i32 @llvm.bswap.i32(i32 %arg)
- store i32 %swp2, i32* %dst
+ store i32 %swp2, ptr %dst
- %Ld2 = load i32, i32* %src
+ %Ld2 = load i32, ptr %src
%swp3 = tail call i32 @llvm.bswap.i32(i32 %Ld2)
- store i32 %swp3, i32* %dst
+ store i32 %swp3, ptr %dst
ret void
}
-define void @bswap_v4i32_mem(<4 x i32>* %src, <4 x i32> %arg, <4 x i32>* %dst) {
+define void @bswap_v4i32_mem(ptr %src, <4 x i32> %arg, ptr %dst) {
; CHECK: function 'bswap_v4i32_mem'
-; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <4 x i32>, <4 x i32>* %src
-; Z15: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <4 x i32>, <4 x i32>* %src
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <4 x i32>, ptr %src
+; Z15: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <4 x i32>, ptr %src
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld1)
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %arg)
-; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %swp2, <4 x i32>* %dst
-; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <4 x i32> %swp2, <4 x i32>* %dst
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <4 x i32>, <4 x i32>* %src
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %swp2, ptr %dst
+; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <4 x i32> %swp2, ptr %dst
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <4 x i32>, ptr %src
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld2)
-; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %swp3, <4 x i32>* %dst
-; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <4 x i32> %swp3, <4 x i32>* %dst
-%Ld1 = load <4 x i32>, <4 x i32>* %src
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %swp3, ptr %dst
+; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <4 x i32> %swp3, ptr %dst
+%Ld1 = load <4 x i32>, ptr %src
%swp1 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld1)
%swp2 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %arg)
- store <4 x i32> %swp2, <4 x i32>* %dst
+ store <4 x i32> %swp2, ptr %dst
- %Ld2 = load <4 x i32>, <4 x i32>* %src
+ %Ld2 = load <4 x i32>, ptr %src
%swp3 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld2)
- store <4 x i32> %swp3, <4 x i32>* %dst
+ store <4 x i32> %swp3, ptr %dst
ret void
}
-define void @bswap_i16_mem(i16* %src, i16 %arg, i16* %dst) {
+define void @bswap_i16_mem(ptr %src, i16 %arg, ptr %dst) {
; CHECK: function 'bswap_i16_mem'
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i16, i16* %src
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i16, ptr %src
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i16 @llvm.bswap.i16(i16 %Ld1)
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call i16 @llvm.bswap.i16(i16 %arg)
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i16 %swp2, i16* %dst
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load i16, i16* %src
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i16 %swp2, ptr %dst
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load i16, ptr %src
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call i16 @llvm.bswap.i16(i16 %Ld2)
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i16 %swp3, i16* %dst
- %Ld1 = load i16, i16* %src
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i16 %swp3, ptr %dst
+ %Ld1 = load i16, ptr %src
%swp1 = tail call i16 @llvm.bswap.i16(i16 %Ld1)
%swp2 = tail call i16 @llvm.bswap.i16(i16 %arg)
- store i16 %swp2, i16* %dst
+ store i16 %swp2, ptr %dst
- %Ld2 = load i16, i16* %src
+ %Ld2 = load i16, ptr %src
%swp3 = tail call i16 @llvm.bswap.i16(i16 %Ld2)
- store i16 %swp3, i16* %dst
+ store i16 %swp3, ptr %dst
ret void
}
-define void @bswap_v8i16_mem(<8 x i16>* %src, <8 x i16> %arg, <8 x i16>* %dst) {
+define void @bswap_v8i16_mem(ptr %src, <8 x i16> %arg, ptr %dst) {
; CHECK: function 'bswap_v8i16_mem'
-; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <8 x i16>, <8 x i16>* %src
-; Z15: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <8 x i16>, <8 x i16>* %src
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <8 x i16>, ptr %src
+; Z15: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <8 x i16>, ptr %src
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld1)
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %arg)
-; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %swp2, <8 x i16>* %dst
-; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <8 x i16> %swp2, <8 x i16>* %dst
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <8 x i16>, <8 x i16>* %src
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %swp2, ptr %dst
+; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <8 x i16> %swp2, ptr %dst
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <8 x i16>, ptr %src
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld2)
-; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %swp3, <8 x i16>* %dst
-; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <8 x i16> %swp3, <8 x i16>* %dst
-%Ld1 = load <8 x i16>, <8 x i16>* %src
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %swp3, ptr %dst
+; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <8 x i16> %swp3, ptr %dst
+%Ld1 = load <8 x i16>, ptr %src
%swp1 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld1)
%swp2 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %arg)
- store <8 x i16> %swp2, <8 x i16>* %dst
+ store <8 x i16> %swp2, ptr %dst
- %Ld2 = load <8 x i16>, <8 x i16>* %src
+ %Ld2 = load <8 x i16>, ptr %src
%swp3 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld2)
- store <8 x i16> %swp3, <8 x i16>* %dst
+ store <8 x i16> %swp3, ptr %dst
ret void
}
;
; Test that load and test results in 0 cost for the compare.
-define i64 @fun0(i64* %Src, i64 %Arg) {
- %Ld1 = load i64, i64* %Src
+define i64 @fun0(ptr %Src, i64 %Arg) {
+ %Ld1 = load i64, ptr %Src
%Cmp = icmp eq i64 %Ld1, 0
%S = select i1 %Cmp, i64 %Arg, i64 %Ld1
ret i64 %S
; CHECK: function 'fun0'
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load i64, i64* %Src
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load i64, ptr %Src
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Cmp = icmp eq i64 %Ld1, 0
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %S = select
}
-define i32 @fun1(i32* %Src, i32 %Arg) {
- %Ld1 = load i32, i32* %Src
+define i32 @fun1(ptr %Src, i32 %Arg) {
+ %Ld1 = load i32, ptr %Src
%Cmp = icmp eq i32 %Ld1, 0
%S = select i1 %Cmp, i32 %Arg, i32 %Ld1
ret i32 %S
; CHECK: function 'fun1'
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load i32, i32* %Src
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load i32, ptr %Src
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Cmp = icmp eq i32 %Ld1, 0
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %S = select
}
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
; Test that the cost heuristic for a folded load works also for a pointer operand.
-define void @fun0(i64* %lhs, i64** %rhs_ptr) {
- %rhs = load i64*, i64** %rhs_ptr
- %c = icmp eq i64* %lhs, %rhs
+define void @fun0(ptr %lhs, ptr %rhs_ptr) {
+ %rhs = load ptr, ptr %rhs_ptr
+ %c = icmp eq ptr %lhs, %rhs
ret void
; CHECK: function 'fun0'
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %rhs = load
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=systemz-unknown < %s | FileCheck %s --check-prefix=CHECK
; Check that cost is 1 for unusual load to register sized load.
-define i32 @loadUnusualIntegerWithTrunc(i128* %ptr) {
+define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualIntegerWithTrunc'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, i128* %ptr, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc = trunc i128 %out to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %trunc
;
- %out = load i128, i128* %ptr
+ %out = load i128, ptr %ptr
%trunc = trunc i128 %out to i32
ret i32 %trunc
}
-define i128 @loadUnusualInteger(i128* %ptr) {
+define i128 @loadUnusualInteger(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualInteger'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, i128* %ptr, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i128 %out
;
- %out = load i128, i128* %ptr
+ %out = load i128, ptr %ptr
ret i128 %out
}
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
define void @store() {
- store i8 undef, i8* undef
- store i16 undef, i16* undef
- store i32 undef, i32* undef
- store i64 undef, i64* undef
- store float undef, float* undef
- store double undef, double* undef
- store fp128 undef, fp128* undef
- store <2 x i8> undef, <2 x i8>* undef
- store <2 x i16> undef, <2 x i16>* undef
- store <2 x i32> undef, <2 x i32>* undef
- store <2 x i64> undef, <2 x i64>* undef
- store <2 x float> undef, <2 x float>* undef
- store <2 x double> undef, <2 x double>* undef
- store <4 x i8> undef, <4 x i8>* undef
- store <4 x i16> undef, <4 x i16>* undef
- store <4 x i32> undef, <4 x i32>* undef
- store <4 x i64> undef, <4 x i64>* undef
- store <4 x float> undef, <4 x float>* undef
- store <4 x double> undef, <4 x double>* undef
- store <8 x i8> undef, <8 x i8>* undef
- store <8 x i16> undef, <8 x i16>* undef
- store <8 x i32> undef, <8 x i32>* undef
- store <8 x i64> undef, <8 x i64>* undef
- store <8 x float> undef, <8 x float>* undef
- store <8 x double> undef, <8 x double>* undef
- store <16 x i8> undef, <16 x i8>* undef
- store <16 x i16> undef, <16 x i16>* undef
- store <16 x i32> undef, <16 x i32>* undef
- store <16 x i64> undef, <16 x i64>* undef
- store <16 x float> undef, <16 x float>* undef
- store <16 x double> undef, <16 x double>* undef
+ store i8 undef, ptr undef
+ store i16 undef, ptr undef
+ store i32 undef, ptr undef
+ store i64 undef, ptr undef
+ store float undef, ptr undef
+ store double undef, ptr undef
+ store fp128 undef, ptr undef
+ store <2 x i8> undef, ptr undef
+ store <2 x i16> undef, ptr undef
+ store <2 x i32> undef, ptr undef
+ store <2 x i64> undef, ptr undef
+ store <2 x float> undef, ptr undef
+ store <2 x double> undef, ptr undef
+ store <4 x i8> undef, ptr undef
+ store <4 x i16> undef, ptr undef
+ store <4 x i32> undef, ptr undef
+ store <4 x i64> undef, ptr undef
+ store <4 x float> undef, ptr undef
+ store <4 x double> undef, ptr undef
+ store <8 x i8> undef, ptr undef
+ store <8 x i16> undef, ptr undef
+ store <8 x i32> undef, ptr undef
+ store <8 x i64> undef, ptr undef
+ store <8 x float> undef, ptr undef
+ store <8 x double> undef, ptr undef
+ store <16 x i8> undef, ptr undef
+ store <16 x i16> undef, ptr undef
+ store <16 x i32> undef, ptr undef
+ store <16 x i64> undef, ptr undef
+ store <16 x float> undef, ptr undef
+ store <16 x double> undef, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store fp128 undef, fp128* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, <4 x double>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef
-; CHECK: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, <8 x i64>* undef
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, <8 x float>* undef
-; CHECK: Cost Model: Found an estimated cost of 4 for instruction: store <8 x double> undef, <8 x double>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, <16 x i16>* undef
-; CHECK: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> undef, <16 x i32>* undef
-; CHECK: Cost Model: Found an estimated cost of 8 for instruction: store <16 x i64> undef, <16 x i64>* undef
-; CHECK: Cost Model: Found an estimated cost of 4 for instruction: store <16 x float> undef, <16 x float>* undef
-; CHECK: Cost Model: Found an estimated cost of 8 for instruction: store <16 x double> undef, <16 x double>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store fp128 undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction: store <8 x double> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction: store <16 x i64> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction: store <16 x float> undef, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction: store <16 x double> undef, ptr undef
ret void;
}
define void @load() {
- load i8, i8* undef
- load i16, i16* undef
- load i32, i32* undef
- load i64, i64* undef
- load float, float* undef
- load double, double* undef
- load fp128, fp128* undef
- load <2 x i8>, <2 x i8>* undef
- load <2 x i16>, <2 x i16>* undef
- load <2 x i32>, <2 x i32>* undef
- load <2 x i64>, <2 x i64>* undef
- load <2 x float>, <2 x float>* undef
- load <2 x double>, <2 x double>* undef
- load <4 x i8>, <4 x i8>* undef
- load <4 x i16>, <4 x i16>* undef
- load <4 x i32>, <4 x i32>* undef
- load <4 x i64>, <4 x i64>* undef
- load <4 x float>, <4 x float>* undef
- load <4 x double>, <4 x double>* undef
- load <8 x i8>, <8 x i8>* undef
- load <8 x i16>, <8 x i16>* undef
- load <8 x i32>, <8 x i32>* undef
- load <8 x i64>, <8 x i64>* undef
- load <8 x float>, <8 x float>* undef
- load <8 x double>, <8 x double>* undef
- load <16 x i8>, <16 x i8>* undef
- load <16 x i16>, <16 x i16>* undef
- load <16 x i32>, <16 x i32>* undef
- load <16 x i64>, <16 x i64>* undef
- load <16 x float>, <16 x float>* undef
- load <16 x double>, <16 x double>* undef
+ load i8, ptr undef
+ load i16, ptr undef
+ load i32, ptr undef
+ load i64, ptr undef
+ load float, ptr undef
+ load double, ptr undef
+ load fp128, ptr undef
+ load <2 x i8>, ptr undef
+ load <2 x i16>, ptr undef
+ load <2 x i32>, ptr undef
+ load <2 x i64>, ptr undef
+ load <2 x float>, ptr undef
+ load <2 x double>, ptr undef
+ load <4 x i8>, ptr undef
+ load <4 x i16>, ptr undef
+ load <4 x i32>, ptr undef
+ load <4 x i64>, ptr undef
+ load <4 x float>, ptr undef
+ load <4 x double>, ptr undef
+ load <8 x i8>, ptr undef
+ load <8 x i16>, ptr undef
+ load <8 x i32>, ptr undef
+ load <8 x i64>, ptr undef
+ load <8 x float>, ptr undef
+ load <8 x double>, ptr undef
+ load <16 x i8>, ptr undef
+ load <16 x i16>, ptr undef
+ load <16 x i32>, ptr undef
+ load <16 x i64>, ptr undef
+ load <16 x float>, ptr undef
+ load <16 x double>, ptr undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = load float, float* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = load double, double* undef
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %7 = load fp128, fp128* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <2 x i8>, <2 x i8>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i16>, <2 x i16>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <2 x i32>, <2 x i32>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <2 x i64>, <2 x i64>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x float>, <2 x float>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <2 x double>, <2 x double>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <4 x i8>, <4 x i8>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i16>, <4 x i16>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x i32>, <4 x i32>* undef
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <4 x i64>, <4 x i64>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, <4 x float>* undef
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <4 x double>, <4 x double>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <8 x i8>, <8 x i8>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i16>, <8 x i16>* undef
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <8 x i32>, <8 x i32>* undef
-; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <8 x i64>, <8 x i64>* undef
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %24 = load <8 x float>, <8 x float>* undef
-; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %25 = load <8 x double>, <8 x double>* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %26 = load <16 x i8>, <16 x i8>* undef
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <16 x i16>, <16 x i16>* undef
-; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %28 = load <16 x i32>, <16 x i32>* undef
-; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %29 = load <16 x i64>, <16 x i64>* undef
-; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %30 = load <16 x float>, <16 x float>* undef
-; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %31 = load <16 x double>, <16 x double>* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = load float, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = load double, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %7 = load fp128, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <2 x i8>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <2 x i16>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <2 x i32>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <2 x i64>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x float>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <2 x double>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <4 x i8>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i16>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <4 x i32>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <4 x i64>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <4 x double>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <8 x i8>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i16>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <8 x i32>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <8 x i64>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %24 = load <8 x float>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %25 = load <8 x double>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %26 = load <16 x i8>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <16 x i16>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %28 = load <16 x i32>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %29 = load <16 x i64>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %30 = load <16 x float>, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %31 = load <16 x double>, ptr undef
ret void;
}
; Z15: Cost Model: Found an estimated cost of 0 for instruction: %res4 = xor i32 %c4, -1
entry:
- %l0 = load i32, i32* undef
+ %l0 = load i32, ptr undef
%c0 = xor i32 %l0, -1
%res0 = or i32 %a, %c0
- store i32 %res0, i32* undef
+ store i32 %res0, ptr undef
- %l1 = load i32, i32* undef
+ %l1 = load i32, ptr undef
%c1 = xor i32 %l1, -1
%res1 = and i32 %a, %c1
- store i32 %res1, i32* undef
+ store i32 %res1, ptr undef
- %l2 = load i32, i32* undef
+ %l2 = load i32, ptr undef
%c2 = and i32 %l2, %a
%res2 = xor i32 %c2, -1
- store i32 %res2, i32* undef
+ store i32 %res2, ptr undef
- %l3 = load i32, i32* undef
+ %l3 = load i32, ptr undef
%c3 = or i32 %l3, %a
%res3 = xor i32 %c3, -1
- store i32 %res3, i32* undef
+ store i32 %res3, ptr undef
- %l4 = load i32, i32* undef
+ %l4 = load i32, ptr undef
%c4 = xor i32 %l4, %a
%res4 = xor i32 %c4, -1
- store i32 %res4, i32* undef
+ store i32 %res4, ptr undef
ret void
}
; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res4 = xor i64 %c4, -1
; Z15: Cost Model: Found an estimated cost of 0 for instruction: %res4 = xor i64 %c4, -1
entry:
- %l0 = load i64, i64* undef
+ %l0 = load i64, ptr undef
%c0 = xor i64 %l0, -1
%res0 = or i64 %a, %c0
- store i64 %res0, i64* undef
+ store i64 %res0, ptr undef
- %l1 = load i64, i64* undef
+ %l1 = load i64, ptr undef
%c1 = xor i64 %l1, -1
%res1 = and i64 %a, %c1
- store i64 %res1, i64* undef
+ store i64 %res1, ptr undef
- %l2 = load i64, i64* undef
+ %l2 = load i64, ptr undef
%c2 = and i64 %l2, %a
%res2 = xor i64 %c2, -1
- store i64 %res2, i64* undef
+ store i64 %res2, ptr undef
- %l3 = load i64, i64* undef
+ %l3 = load i64, ptr undef
%c3 = or i64 %l3, %a
%res3 = xor i64 %c3, -1
- store i64 %res3, i64* undef
+ store i64 %res3, ptr undef
- %l4 = load i64, i64* undef
+ %l4 = load i64, ptr undef
%c4 = xor i64 %l4, %a
%res4 = xor i64 %c4, -1
- store i64 %res4, i64* undef
+ store i64 %res4, ptr undef
ret void
}
; value.
define void @add() {
- %li32 = load i32, i32* undef
+ %li32 = load i32, ptr undef
add i32 %li32, undef
- %li32_0 = load i32, i32* undef
- %li32_1 = load i32, i32* undef
+ %li32_0 = load i32, ptr undef
+ %li32_1 = load i32, ptr undef
add i32 %li32_0, %li32_1
- %li64 = load i64, i64* undef
+ %li64 = load i64, ptr undef
add i64 %li64, undef
- %li64_0 = load i64, i64* undef
- %li64_1 = load i64, i64* undef
+ %li64_0 = load i64, ptr undef
+ %li64_1 = load i64, ptr undef
add i64 %li64_0, %li64_1
; Truncated load
- %li64_2 = load i64, i64* undef
+ %li64_2 = load i64, ptr undef
%tr = trunc i64 %li64_2 to i32
add i32 %tr, undef
; Sign-extended loads
- %li16_0 = load i16, i16* undef
+ %li16_0 = load i16, ptr undef
%sext_0 = sext i16 %li16_0 to i32
add i32 %sext_0, undef
- %li16_1 = load i16, i16* undef
+ %li16_1 = load i16, ptr undef
%sext_1 = sext i16 %li16_1 to i64
add i64 %sext_1, undef
- %li32_2 = load i32, i32* undef
+ %li32_2 = load i32, ptr undef
%sext_2 = sext i32 %li32_2 to i64
add i64 %sext_2, undef
; Zero-extended loads
- %li32_3 = load i32, i32* undef
+ %li32_3 = load i32, ptr undef
%zext_0 = zext i32 %li32_3 to i64
add i64 %zext_0, undef
; Loads with multiple uses are *not* folded
- %li16_3 = load i16, i16* undef
+ %li16_3 = load i16, ptr undef
%sext_3 = sext i16 %li16_3 to i32
%sext_4 = sext i16 %li16_3 to i32
add i32 %sext_3, undef
ret void;
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = add i32 %li32, undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_0 = load i32, i32* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_1 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_0 = load i32, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_1 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = add i32 %li32_0, %li32_1
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = add i64 %li64, undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = add i64 %li64_0, %li64_1
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr = trunc i64 %li64_2 to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = add i32 %tr, undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li16_0 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li16_0 = load i16, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_0 = sext i16 %li16_0 to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = add i32 %sext_0, undef
-; Z13: Cost Model: Found an estimated cost of 1 for instruction: %li16_1 = load i16, i16* undef
-; Z14: Cost Model: Found an estimated cost of 0 for instruction: %li16_1 = load i16, i16* undef
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %li16_1 = load i16, ptr undef
+; Z14: Cost Model: Found an estimated cost of 0 for instruction: %li16_1 = load i16, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_1 = sext i16 %li16_1 to i64
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %7 = add i64 %sext_1, undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_2 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_2 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_2 = sext i32 %li32_2 to i64
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %8 = add i64 %sext_2, undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_3 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_3 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %zext_0 = zext i32 %li32_3 to i64
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %9 = add i64 %zext_0, undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_3 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_3 = load i16, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_3 = sext i16 %li16_3 to i32
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_4 = sext i16 %li16_3 to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %10 = add i32 %sext_3, undef
}
-define void @add_i16_mem16(i16 %Arg, i16* %Src1, i16* %Src2, i16* %Dst, i32* %Src32) {
- %L1 = load i16, i16* %Src1
+define void @add_i16_mem16(i16 %Arg, ptr %Src1, ptr %Src2, ptr %Dst, ptr %Src32) {
+ %L1 = load i16, ptr %Src1
%S0 = add i16 %L1, %Arg
- store volatile i16 %S0, i16* %Dst
+ store volatile i16 %S0, ptr %Dst
- %L2 = load i16, i16* %Src1
- %L3 = load i16, i16* %Src2
+ %L2 = load i16, ptr %Src1
+ %L3 = load i16, ptr %Src2
%S1 = add i16 %L2, %L3
- store volatile i16 %S1, i16* %Dst
+ store volatile i16 %S1, ptr %Dst
; Truncated load
- %L32 = load i32, i32* %Src32
+ %L32 = load i32, ptr %Src32
%tr = trunc i32 %L32 to i16
%S2 = add i16 %tr, %Arg
- store volatile i16 %S2, i16* %Dst
+ store volatile i16 %S2, ptr %Dst
ret void
; CHECK: function 'add_i16_mem16'
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L1 = load i16, i16* %Src1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L1 = load i16, ptr %Src1
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %S0 = add i16 %L1, %Arg
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %S0, i16* %Dst
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L2 = load i16, i16* %Src1
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %L3 = load i16, i16* %Src2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %S0, ptr %Dst
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L2 = load i16, ptr %Src1
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %L3 = load i16, ptr %Src2
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %S1 = add i16 %L2, %L3
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %S1, i16* %Dst
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L32 = load i32, i32* %Src32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %S1, ptr %Dst
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L32 = load i32, ptr %Src32
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr = trunc i32 %L32 to i16
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %S2 = add i16 %tr, %Arg
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %S2, i16* %Dst
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %S2, ptr %Dst
}
define void @sub_lhs_mem() {
- %li32 = load i32, i32* undef
+ %li32 = load i32, ptr undef
sub i32 %li32, undef
- %li32_0 = load i32, i32* undef
- %li32_1 = load i32, i32* undef
+ %li32_0 = load i32, ptr undef
+ %li32_1 = load i32, ptr undef
sub i32 %li32_0, %li32_1
- %li64 = load i64, i64* undef
+ %li64 = load i64, ptr undef
sub i64 %li64, undef
- %li64_0 = load i64, i64* undef
- %li64_1 = load i64, i64* undef
+ %li64_0 = load i64, ptr undef
+ %li64_1 = load i64, ptr undef
sub i64 %li64_0, %li64_1
; Truncated load
- %li64_2 = load i64, i64* undef
+ %li64_2 = load i64, ptr undef
%tr = trunc i64 %li64_2 to i32
sub i32 %tr, undef
; Sign-extended loads
- %li16_0 = load i16, i16* undef
+ %li16_0 = load i16, ptr undef
%sext_0 = sext i16 %li16_0 to i32
sub i32 %sext_0, undef
- %li16_1 = load i16, i16* undef
+ %li16_1 = load i16, ptr undef
%sext_1 = sext i16 %li16_1 to i64
sub i64 %sext_1, undef
- %li32_2 = load i32, i32* undef
+ %li32_2 = load i32, ptr undef
%sext_2 = sext i32 %li32_2 to i64
sub i64 %sext_2, undef
; Zero-extended loads
- %li32_3 = load i32, i32* undef
+ %li32_3 = load i32, ptr undef
%zext_0 = zext i32 %li32_3 to i64
sub i64 %zext_0, undef
; Loads with multiple uses are *not* folded
- %li16_3 = load i16, i16* undef
+ %li16_3 = load i16, ptr undef
%sext_3 = sext i16 %li16_3 to i32
%sext_4 = sext i16 %li16_3 to i32
sub i32 %sext_3, undef
ret void;
; A sub LHS loaded operand is *not* foldable.
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = sub i32 %li32, undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_0 = load i32, i32* undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_1 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_0 = load i32, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_1 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = sub i32 %li32_0, %li32_1
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = sub i64 %li64, undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_0 = load i64, i64* undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_1 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_0 = load i64, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_1 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = sub i64 %li64_0, %li64_1
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_2 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr = trunc i64 %li64_2 to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = sub i32 %tr, undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_0 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_0 = load i16, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_0 = sext i16 %li16_0 to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = sub i32 %sext_0, undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_1 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_1 = load i16, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_1 = sext i16 %li16_1 to i64
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %7 = sub i64 %sext_1, undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_2 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_2 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_2 = sext i32 %li32_2 to i64
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %8 = sub i64 %sext_2, undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_3 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_3 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %zext_0 = zext i32 %li32_3 to i64
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %9 = sub i64 %zext_0, undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_3 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_3 = load i16, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_3 = sext i16 %li16_3 to i32
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_4 = sext i16 %li16_3 to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %10 = sub i32 %sext_3, undef
}
define void @sub_rhs_mem() {
- %li32 = load i32, i32* undef
+ %li32 = load i32, ptr undef
sub i32 undef, %li32
- %li64 = load i64, i64* undef
+ %li64 = load i64, ptr undef
sub i64 undef, %li64
; Truncated load
- %li64_2 = load i64, i64* undef
+ %li64_2 = load i64, ptr undef
%tr = trunc i64 %li64_2 to i32
sub i32 undef, %tr
; Sign-extended loads
- %li16_0 = load i16, i16* undef
+ %li16_0 = load i16, ptr undef
%sext_0 = sext i16 %li16_0 to i32
sub i32 undef, %sext_0
- %li16_1 = load i16, i16* undef
+ %li16_1 = load i16, ptr undef
%sext_1 = sext i16 %li16_1 to i64
sub i64 undef, %sext_1
- %li32_2 = load i32, i32* undef
+ %li32_2 = load i32, ptr undef
%sext_2 = sext i32 %li32_2 to i64
sub i64 undef, %sext_2
; Zero-extended loads
- %li32_3 = load i32, i32* undef
+ %li32_3 = load i32, ptr undef
%zext_0 = zext i32 %li32_3 to i64
sub i64 undef, %zext_0
; Loads with multiple uses are *not* folded
- %li16_3 = load i16, i16* undef
+ %li16_3 = load i16, ptr undef
%sext_3 = sext i16 %li16_3 to i32
%sext_4 = sext i16 %li16_3 to i32
sub i32 undef, %sext_3
ret void;
; A sub RHS loaded operand is foldable.
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = sub i32 undef, %li32
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = sub i64 undef, %li64
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr = trunc i64 %li64_2 to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = sub i32 undef, %tr
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li16_0 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li16_0 = load i16, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_0 = sext i16 %li16_0 to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = sub i32 undef, %sext_0
-; Z13: Cost Model: Found an estimated cost of 1 for instruction: %li16_1 = load i16, i16* undef
-; Z14: Cost Model: Found an estimated cost of 0 for instruction: %li16_1 = load i16, i16* undef
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %li16_1 = load i16, ptr undef
+; Z14: Cost Model: Found an estimated cost of 0 for instruction: %li16_1 = load i16, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_1 = sext i16 %li16_1 to i64
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = sub i64 undef, %sext_1
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_2 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_2 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_2 = sext i32 %li32_2 to i64
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = sub i64 undef, %sext_2
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_3 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_3 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %zext_0 = zext i32 %li32_3 to i64
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %7 = sub i64 undef, %zext_0
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_3 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_3 = load i16, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_3 = sext i16 %li16_3 to i32
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_4 = sext i16 %li16_3 to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %8 = sub i32 undef, %sext_3
}
-define void @sub_i16_mem16(i16 %Arg, i16* %Src1, i16* %Src2, i16* %Dst, i32* %Src32) {
- %L1 = load i16, i16* %Src1
+define void @sub_i16_mem16(i16 %Arg, ptr %Src1, ptr %Src2, ptr %Dst, ptr %Src32) {
+ %L1 = load i16, ptr %Src1
%D0 = sub i16 %Arg, %L1
- store volatile i16 %D0, i16* %Dst
+ store volatile i16 %D0, ptr %Dst
- %L2 = load i16, i16* %Src1
- %L3 = load i16, i16* %Src2
+ %L2 = load i16, ptr %Src1
+ %L3 = load i16, ptr %Src2
%D1 = sub i16 %L2, %L3
- store volatile i16 %D1, i16* %Dst
+ store volatile i16 %D1, ptr %Dst
; Truncated load
- %L32 = load i32, i32* %Src32
+ %L32 = load i32, ptr %Src32
%tr = trunc i32 %L32 to i16
%D2 = sub i16 %Arg, %tr
- store volatile i16 %D2, i16* %Dst
+ store volatile i16 %D2, ptr %Dst
ret void
; CHECK: function 'sub_i16_mem16'
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L1 = load i16, i16* %Src1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L1 = load i16, ptr %Src1
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %D0 = sub i16 %Arg, %L1
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %D0, i16* %Dst
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %L2 = load i16, i16* %Src1
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L3 = load i16, i16* %Src2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %D0, ptr %Dst
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %L2 = load i16, ptr %Src1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L3 = load i16, ptr %Src2
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sub i16 %L2, %L3
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %D1, i16* %Dst
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L32 = load i32, i32* %Src32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %D1, ptr %Dst
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L32 = load i32, ptr %Src32
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr = trunc i32 %L32 to i16
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %D2 = sub i16 %Arg, %tr
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %D2, i16* %Dst
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %D2, ptr %Dst
}
define void @mul() {
- %li32 = load i32, i32* undef
+ %li32 = load i32, ptr undef
mul i32 %li32, undef
- %li32_0 = load i32, i32* undef
- %li32_1 = load i32, i32* undef
+ %li32_0 = load i32, ptr undef
+ %li32_1 = load i32, ptr undef
mul i32 %li32_0, %li32_1
- %li64 = load i64, i64* undef
+ %li64 = load i64, ptr undef
mul i64 %li64, undef
- %li64_0 = load i64, i64* undef
- %li64_1 = load i64, i64* undef
+ %li64_0 = load i64, ptr undef
+ %li64_1 = load i64, ptr undef
mul i64 %li64_0, %li64_1
; Truncated load
- %li64_2 = load i64, i64* undef
+ %li64_2 = load i64, ptr undef
%tr = trunc i64 %li64_2 to i32
mul i32 %tr, undef
; Sign-extended loads
- %li16_0 = load i16, i16* undef
+ %li16_0 = load i16, ptr undef
%sext_0 = sext i16 %li16_0 to i32
mul i32 %sext_0, undef
- %li16_1 = load i16, i16* undef
+ %li16_1 = load i16, ptr undef
%sext_1 = sext i16 %li16_1 to i64
mul i64 %sext_1, undef
- %li32_2 = load i32, i32* undef
+ %li32_2 = load i32, ptr undef
%sext_2 = sext i32 %li32_2 to i64
mul i64 %sext_2, undef
; Zero-extended loads are *not* folded
- %li16_2 = load i16, i16* undef
+ %li16_2 = load i16, ptr undef
%zext_0 = zext i16 %li16_2 to i32
mul i32 %zext_0, undef
; Loads with multiple uses are *not* folded
- %li16_3 = load i16, i16* undef
+ %li16_3 = load i16, ptr undef
%sext_3 = sext i16 %li16_3 to i32
%sext_4 = sext i16 %li16_3 to i32
mul i32 %sext_3, undef
ret void;
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = mul i32 %li32, undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_0 = load i32, i32* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_1 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_0 = load i32, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_1 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = mul i32 %li32_0, %li32_1
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = mul i64 %li64, undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = mul i64 %li64_0, %li64_1
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr = trunc i64 %li64_2 to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = mul i32 %tr, undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li16_0 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li16_0 = load i16, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_0 = sext i16 %li16_0 to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = mul i32 %sext_0, undef
-; Z13: Cost Model: Found an estimated cost of 1 for instruction: %li16_1 = load i16, i16* undef
-; Z14: Cost Model: Found an estimated cost of 0 for instruction: %li16_1 = load i16, i16* undef
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %li16_1 = load i16, ptr undef
+; Z14: Cost Model: Found an estimated cost of 0 for instruction: %li16_1 = load i16, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_1 = sext i16 %li16_1 to i64
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %7 = mul i64 %sext_1, undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_2 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_2 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_2 = sext i32 %li32_2 to i64
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %8 = mul i64 %sext_2, undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_2 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_2 = load i16, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %zext_0 = zext i16 %li16_2 to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %9 = mul i32 %zext_0, undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_3 = load i16, i16* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li16_3 = load i16, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_3 = sext i16 %li16_3 to i32
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_4 = sext i16 %li16_3 to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %10 = mul i32 %sext_3, undef
}
-define void @mul_i16_mem16(i16 %Arg, i16* %Src1, i16* %Src2, i16* %Dst, i32* %Src32) {
- %L1 = load i16, i16* %Src1
+define void @mul_i16_mem16(i16 %Arg, ptr %Src1, ptr %Src2, ptr %Dst, ptr %Src32) {
+ %L1 = load i16, ptr %Src1
%P0 = mul i16 %Arg, %L1
- store volatile i16 %P0, i16* %Dst
+ store volatile i16 %P0, ptr %Dst
- %L2 = load i16, i16* %Src1
- %L3 = load i16, i16* %Src2
+ %L2 = load i16, ptr %Src1
+ %L3 = load i16, ptr %Src2
%P1 = mul i16 %L2, %L3
- store volatile i16 %P1, i16* %Dst
+ store volatile i16 %P1, ptr %Dst
; Truncated load
- %L32 = load i32, i32* %Src32
+ %L32 = load i32, ptr %Src32
%tr = trunc i32 %L32 to i16
%P2 = mul i16 %Arg, %tr
- store volatile i16 %P2, i16* %Dst
+ store volatile i16 %P2, ptr %Dst
ret void
; CHECK: function 'mul_i16_mem16'
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L1 = load i16, i16* %Src1
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L1 = load i16, ptr %Src1
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %P0 = mul i16 %Arg, %L1
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %P0, i16* %Dst
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L2 = load i16, i16* %Src1
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %L3 = load i16, i16* %Src2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %P0, ptr %Dst
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L2 = load i16, ptr %Src1
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %L3 = load i16, ptr %Src2
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %P1 = mul i16 %L2, %L3
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %P1, i16* %Dst
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L32 = load i32, i32* %Src32
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %P1, ptr %Dst
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %L32 = load i32, ptr %Src32
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr = trunc i32 %L32 to i16
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %P2 = mul i16 %Arg, %tr
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %P2, i16* %Dst
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: store volatile i16 %P2, ptr %Dst
}
define void @sdiv_lhs(i32 %arg32, i64 %arg64) {
- %li32 = load i32, i32* undef
+ %li32 = load i32, ptr undef
sdiv i32 %li32, %arg32
- %li32_0 = load i32, i32* undef
- %li32_1 = load i32, i32* undef
+ %li32_0 = load i32, ptr undef
+ %li32_1 = load i32, ptr undef
sdiv i32 %li32_0, %li32_1
- %li64 = load i64, i64* undef
+ %li64 = load i64, ptr undef
sdiv i64 %li64, %arg64
- %li64_0 = load i64, i64* undef
- %li64_1 = load i64, i64* undef
+ %li64_0 = load i64, ptr undef
+ %li64_1 = load i64, ptr undef
sdiv i64 %li64_0, %li64_1
; Truncated load
- %li64_2 = load i64, i64* undef
+ %li64_2 = load i64, ptr undef
%tr = trunc i64 %li64_2 to i32
sdiv i32 %tr, undef
; Sign-extended loads
- %li32_2 = load i32, i32* undef
+ %li32_2 = load i32, ptr undef
%sext_0 = sext i32 %li32_2 to i64
sdiv i64 %sext_0, undef
; Loads with multiple uses are *not* folded
- %li32_3 = load i32, i32* undef
+ %li32_3 = load i32, ptr undef
%sext_1 = sext i32 %li32_3 to i64
%sext_2 = sext i32 %li32_3 to i64
sdiv i64 %sext_1, undef
ret void;
; An sdiv loaded dividend (lhs) operand is *not* foldable.
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %1 = sdiv i32 %li32, %arg32
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_0 = load i32, i32* undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_1 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_0 = load i32, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_1 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %2 = sdiv i32 %li32_0, %li32_1
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %3 = sdiv i64 %li64, %arg64
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_0 = load i64, i64* undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_1 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_0 = load i64, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_1 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %4 = sdiv i64 %li64_0, %li64_1
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_2 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr = trunc i64 %li64_2 to i32
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %5 = sdiv i32 %tr, undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_2 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_2 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_0 = sext i32 %li32_2 to i64
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %6 = sdiv i64 %sext_0, undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_3 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_3 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_1 = sext i32 %li32_3 to i64
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_2 = sext i32 %li32_3 to i64
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %7 = sdiv i64 %sext_1, undef
}
define void @sdiv_rhs(i32 %arg32, i64 %arg64) {
- %li32 = load i32, i32* undef
+ %li32 = load i32, ptr undef
sdiv i32 %arg32, %li32
- %li64 = load i64, i64* undef
+ %li64 = load i64, ptr undef
sdiv i64 %arg64, %li64
; Truncated load
- %li64_2 = load i64, i64* undef
+ %li64_2 = load i64, ptr undef
%tr = trunc i64 %li64_2 to i32
sdiv i32 undef, %tr
; Sign-extended loads
- %li32_2 = load i32, i32* undef
+ %li32_2 = load i32, ptr undef
%sext_0 = sext i32 %li32_2 to i64
sdiv i64 undef, %sext_0
; Loads with multiple uses are *not* folded
- %li32_3 = load i32, i32* undef
+ %li32_3 = load i32, ptr undef
%sext_1 = sext i32 %li32_3 to i64
%sext_2 = sext i32 %li32_3 to i64
sdiv i64 undef, %sext_1
ret void;
; An sdiv loaded divisor (rhs) operand is foldable.
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %1 = sdiv i32 %arg32, %li32
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %2 = sdiv i64 %arg64, %li64
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr = trunc i64 %li64_2 to i32
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %3 = sdiv i32 undef, %tr
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_2 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_2 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_0 = sext i32 %li32_2 to i64
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %4 = sdiv i64 undef, %sext_0
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_3 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_3 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_1 = sext i32 %li32_3 to i64
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext_2 = sext i32 %li32_3 to i64
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %5 = sdiv i64 undef, %sext_1
}
define void @udiv_lhs(i32 %arg32, i64 %arg64) {
- %li32 = load i32, i32* undef
+ %li32 = load i32, ptr undef
udiv i32 %li32, %arg32
- %li32_0 = load i32, i32* undef
- %li32_1 = load i32, i32* undef
+ %li32_0 = load i32, ptr undef
+ %li32_1 = load i32, ptr undef
udiv i32 %li32_0, %li32_1
- %li64 = load i64, i64* undef
+ %li64 = load i64, ptr undef
udiv i64 %li64, %arg64
- %li64_0 = load i64, i64* undef
- %li64_1 = load i64, i64* undef
+ %li64_0 = load i64, ptr undef
+ %li64_1 = load i64, ptr undef
udiv i64 %li64_0, %li64_1
; Truncated load
- %li64_2 = load i64, i64* undef
+ %li64_2 = load i64, ptr undef
%tr_0 = trunc i64 %li64_2 to i32
udiv i32 %tr_0, undef
; Loads with multiple uses are *not* folded
- %li64_3 = load i64, i64* undef
+ %li64_3 = load i64, ptr undef
%tr_1 = trunc i64 %li64_3 to i32
udiv i64 %li64_3, undef
ret void;
; An udiv loaded dividend (lhs) operand is *not* foldable.
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %1 = udiv i32 %li32, %arg32
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_0 = load i32, i32* undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_1 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_0 = load i32, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_1 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %2 = udiv i32 %li32_0, %li32_1
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %3 = udiv i64 %li64, %arg64
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_0 = load i64, i64* undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_1 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_0 = load i64, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_1 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %4 = udiv i64 %li64_0, %li64_1
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_2 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_0 = trunc i64 %li64_2 to i32
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %5 = udiv i32 %tr_0, undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_1 = trunc i64 %li64_3 to i32
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %6 = udiv i64 %li64_3, undef
}
define void @udiv_rhs(i32 %arg32, i64 %arg64) {
- %li32 = load i32, i32* undef
+ %li32 = load i32, ptr undef
udiv i32 %arg32, %li32
- %li64 = load i64, i64* undef
+ %li64 = load i64, ptr undef
udiv i64 %arg64, %li64
; Truncated load
- %li64_2 = load i64, i64* undef
+ %li64_2 = load i64, ptr undef
%tr_0 = trunc i64 %li64_2 to i32
udiv i32 undef, %tr_0
; Loads with multiple uses are *not* folded
- %li64_3 = load i64, i64* undef
+ %li64_3 = load i64, ptr undef
%tr_1 = trunc i64 %li64_3 to i32
udiv i64 undef, %li64_3
ret void;
; An udiv loaded divisor (rhs) operand is foldable.
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %1 = udiv i32 %arg32, %li32
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %2 = udiv i64 %arg64, %li64
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_0 = trunc i64 %li64_2 to i32
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %3 = udiv i32 undef, %tr_0
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_1 = trunc i64 %li64_3 to i32
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %4 = udiv i64 undef, %li64_3
}
define void @and() {
- %li32 = load i32, i32* undef
+ %li32 = load i32, ptr undef
and i32 %li32, undef
- %li32_0 = load i32, i32* undef
- %li32_1 = load i32, i32* undef
+ %li32_0 = load i32, ptr undef
+ %li32_1 = load i32, ptr undef
and i32 %li32_0, %li32_1
- %li64 = load i64, i64* undef
+ %li64 = load i64, ptr undef
and i64 %li64, undef
- %li64_0 = load i64, i64* undef
- %li64_1 = load i64, i64* undef
+ %li64_0 = load i64, ptr undef
+ %li64_1 = load i64, ptr undef
and i64 %li64_0, %li64_1
; Truncated load
- %li64_2 = load i64, i64* undef
+ %li64_2 = load i64, ptr undef
%tr_0 = trunc i64 %li64_2 to i32
and i32 %tr_0, undef
; Loads with multiple uses are *not* folded
- %li64_3 = load i64, i64* undef
+ %li64_3 = load i64, ptr undef
%tr_1 = trunc i64 %li64_3 to i32
and i64 %li64_3, undef
ret void;
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = and i32 %li32, undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_0 = load i32, i32* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_1 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_0 = load i32, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_1 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = and i32 %li32_0, %li32_1
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = and i64 %li64, undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = and i64 %li64_0, %li64_1
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_0 = trunc i64 %li64_2 to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = and i32 %tr_0, undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_1 = trunc i64 %li64_3 to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = and i64 %li64_3, undef
}
define void @or() {
- %li32 = load i32, i32* undef
+ %li32 = load i32, ptr undef
or i32 %li32, undef
- %li32_0 = load i32, i32* undef
- %li32_1 = load i32, i32* undef
+ %li32_0 = load i32, ptr undef
+ %li32_1 = load i32, ptr undef
or i32 %li32_0, %li32_1
- %li64 = load i64, i64* undef
+ %li64 = load i64, ptr undef
or i64 %li64, undef
- %li64_0 = load i64, i64* undef
- %li64_1 = load i64, i64* undef
+ %li64_0 = load i64, ptr undef
+ %li64_1 = load i64, ptr undef
or i64 %li64_0, %li64_1
; Truncated load
- %li64_2 = load i64, i64* undef
+ %li64_2 = load i64, ptr undef
%tr_0 = trunc i64 %li64_2 to i32
or i32 %tr_0, undef
; Loads with multiple uses are *not* folded
- %li64_3 = load i64, i64* undef
+ %li64_3 = load i64, ptr undef
%tr_1 = trunc i64 %li64_3 to i32
or i64 %li64_3, undef
ret void;
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = or i32 %li32, undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_0 = load i32, i32* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_1 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_0 = load i32, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_1 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = or i32 %li32_0, %li32_1
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = or i64 %li64, undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = or i64 %li64_0, %li64_1
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_0 = trunc i64 %li64_2 to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = or i32 %tr_0, undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_1 = trunc i64 %li64_3 to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = or i64 %li64_3, undef
}
define void @xor() {
- %li32 = load i32, i32* undef
+ %li32 = load i32, ptr undef
xor i32 %li32, undef
- %li32_0 = load i32, i32* undef
- %li32_1 = load i32, i32* undef
+ %li32_0 = load i32, ptr undef
+ %li32_1 = load i32, ptr undef
xor i32 %li32_0, %li32_1
- %li64 = load i64, i64* undef
+ %li64 = load i64, ptr undef
xor i64 %li64, undef
- %li64_0 = load i64, i64* undef
- %li64_1 = load i64, i64* undef
+ %li64_0 = load i64, ptr undef
+ %li64_1 = load i64, ptr undef
xor i64 %li64_0, %li64_1
; Truncated load
- %li64_2 = load i64, i64* undef
+ %li64_2 = load i64, ptr undef
%tr_0 = trunc i64 %li64_2 to i32
xor i32 %tr_0, undef
; Loads with multiple uses are *not* folded
- %li64_3 = load i64, i64* undef
+ %li64_3 = load i64, ptr undef
%tr_1 = trunc i64 %li64_3 to i32
xor i64 %li64_3, undef
ret void;
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = xor i32 %li32, undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_0 = load i32, i32* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_1 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_0 = load i32, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_1 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = xor i32 %li32_0, %li32_1
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = xor i64 %li64, undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = xor i64 %li64_0, %li64_1
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_0 = trunc i64 %li64_2 to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = xor i32 %tr_0, undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_1 = trunc i64 %li64_3 to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = xor i64 %li64_3, undef
}
define void @icmp() {
- %li32 = load i32, i32* undef
+ %li32 = load i32, ptr undef
icmp eq i32 %li32, undef
- %li32_0 = load i32, i32* undef
- %li32_1 = load i32, i32* undef
+ %li32_0 = load i32, ptr undef
+ %li32_1 = load i32, ptr undef
icmp eq i32 %li32_0, %li32_1
- %li64 = load i64, i64* undef
+ %li64 = load i64, ptr undef
icmp eq i64 %li64, undef
- %li64_0 = load i64, i64* undef
- %li64_1 = load i64, i64* undef
+ %li64_0 = load i64, ptr undef
+ %li64_1 = load i64, ptr undef
icmp eq i64 %li64_0, %li64_1
; Truncated load
- %li64_2 = load i64, i64* undef
+ %li64_2 = load i64, ptr undef
%tr_0 = trunc i64 %li64_2 to i32
icmp eq i32 %tr_0, undef
; Sign-extended load
- %li32_2 = load i32, i32* undef
+ %li32_2 = load i32, ptr undef
%sext = sext i32 %li32_2 to i64
icmp eq i64 %sext, undef
; Zero-extended load
- %li32_3 = load i32, i32* undef
+ %li32_3 = load i32, ptr undef
%zext = zext i32 %li32_3 to i64
icmp eq i64 %zext, undef
; Loads with multiple uses are *not* folded
- %li64_3 = load i64, i64* undef
+ %li64_3 = load i64, ptr undef
%tr_1 = trunc i64 %li64_3 to i32
icmp eq i64 %li64_3, undef
ret void;
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %1 = icmp eq i32 %li32, undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_0 = load i32, i32* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_1 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_0 = load i32, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li32_1 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %2 = icmp eq i32 %li32_0, %li32_1
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %3 = icmp eq i64 %li64, undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, i64* undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_0 = load i64, ptr undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_1 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %4 = icmp eq i64 %li64_0, %li64_1
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li64_2 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_0 = trunc i64 %li64_2 to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %5 = icmp eq i32 %tr_0, undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_2 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_2 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i32 %li32_2 to i64
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %6 = icmp eq i64 %sext, undef
-; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_3 = load i32, i32* undef
+; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %li32_3 = load i32, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %zext = zext i32 %li32_3 to i64
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %7 = icmp eq i64 %zext, undef
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, i64* undef
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %li64_3 = load i64, ptr undef
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %tr_1 = trunc i64 %li64_3 to i32
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %8 = icmp eq i64 %li64_3, undef
}
; Check that some costs can be returned for vector instructions also without
; vector support.
-define void @fun(<2 x double>* %arg) {
+define void @fun(ptr %arg) {
entry:
%add = fadd <2 x double> undef, undef
shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
declare i64 @llvm.bswap.i64(i64)
declare i128 @llvm.bswap.i128(i128)
-define void @var_bswap_store_i16(i16 %a, i16* %dst) {
+define void @var_bswap_store_i16(i16 %a, ptr %dst) {
; X64-LABEL: 'var_bswap_store_i16'
; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
-; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, i16* %dst, align 1
+; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, ptr %dst, align 1
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X64-MOVBE-LABEL: 'var_bswap_store_i16'
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
-; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, i16* %dst, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, ptr %dst, align 1
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X64-FASTMOVBE-LABEL: 'var_bswap_store_i16'
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
-; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, i16* %dst, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, ptr %dst, align 1
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X32-LABEL: 'var_bswap_store_i16'
; X32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
-; X32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, i16* %dst, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, ptr %dst, align 1
; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X32-MOVBE-LABEL: 'var_bswap_store_i16'
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
-; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, i16* %dst, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, ptr %dst, align 1
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X32-FASTMOVBE-LABEL: 'var_bswap_store_i16'
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
-; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, i16* %dst, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, ptr %dst, align 1
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%bswap = call i16 @llvm.bswap.i16(i16 %a)
- store i16 %bswap, i16* %dst, align 1
+ store i16 %bswap, ptr %dst, align 1
ret void
}
-define void @var_bswap_store_i16_extrause(i16 %a, i16* %dst) {
+define void @var_bswap_store_i16_extrause(i16 %a, ptr %dst) {
; ALL-LABEL: 'var_bswap_store_i16_extrause'
; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
-; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, i16* %dst, align 1
+; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, ptr %dst, align 1
; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap2 = shl i16 %bswap, 2
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%bswap = call i16 @llvm.bswap.i16(i16 %a)
- store i16 %bswap, i16* %dst, align 1
+ store i16 %bswap, ptr %dst, align 1
%bswap2 = shl i16 %bswap, 2 ; incur an extra use to the bswap
ret void
}
-define void @var_bswap_store_i32(i32 %a, i32* %dst) {
+define void @var_bswap_store_i32(i32 %a, ptr %dst) {
; X64-LABEL: 'var_bswap_store_i32'
; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
-; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %bswap, i32* %dst, align 1
+; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %bswap, ptr %dst, align 1
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X64-MOVBE-LABEL: 'var_bswap_store_i32'
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
-; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %bswap, i32* %dst, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %bswap, ptr %dst, align 1
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X64-FASTMOVBE-LABEL: 'var_bswap_store_i32'
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
-; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %bswap, i32* %dst, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %bswap, ptr %dst, align 1
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X32-LABEL: 'var_bswap_store_i32'
; X32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
-; X32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %bswap, i32* %dst, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %bswap, ptr %dst, align 1
; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X32-MOVBE-LABEL: 'var_bswap_store_i32'
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
-; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %bswap, i32* %dst, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %bswap, ptr %dst, align 1
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X32-FASTMOVBE-LABEL: 'var_bswap_store_i32'
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
-; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %bswap, i32* %dst, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %bswap, ptr %dst, align 1
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%bswap = call i32 @llvm.bswap.i32(i32 %a)
- store i32 %bswap, i32* %dst, align 1
+ store i32 %bswap, ptr %dst, align 1
ret void
}
-define void @var_bswap_store_i32_extrause(i32 %a, i32* %dst) {
+define void @var_bswap_store_i32_extrause(i32 %a, ptr %dst) {
; ALL-LABEL: 'var_bswap_store_i32_extrause'
; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
-; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %bswap, i32* %dst, align 1
+; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %bswap, ptr %dst, align 1
; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap2 = shl i32 %bswap, 2
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%bswap = call i32 @llvm.bswap.i32(i32 %a)
- store i32 %bswap, i32* %dst, align 1
+ store i32 %bswap, ptr %dst, align 1
%bswap2 = shl i32 %bswap, 2 ; incur an extra use to the bswap
ret void
}
-define void @var_bswap_store_i64(i64 %a, i64* %dst) {
+define void @var_bswap_store_i64(i64 %a, ptr %dst) {
; X64-LABEL: 'var_bswap_store_i64'
; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
-; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %bswap, i64* %dst, align 1
+; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %bswap, ptr %dst, align 1
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X64-MOVBE-LABEL: 'var_bswap_store_i64'
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
-; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %bswap, i64* %dst, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %bswap, ptr %dst, align 1
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X64-FASTMOVBE-LABEL: 'var_bswap_store_i64'
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
-; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %bswap, i64* %dst, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %bswap, ptr %dst, align 1
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X32-LABEL: 'var_bswap_store_i64'
; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
-; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %bswap, i64* %dst, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %bswap, ptr %dst, align 1
; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X32-MOVBE-LABEL: 'var_bswap_store_i64'
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
-; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %bswap, i64* %dst, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %bswap, ptr %dst, align 1
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X32-FASTMOVBE-LABEL: 'var_bswap_store_i64'
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
-; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %bswap, i64* %dst, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %bswap, ptr %dst, align 1
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%bswap = call i64 @llvm.bswap.i64(i64 %a)
- store i64 %bswap, i64* %dst, align 1
+ store i64 %bswap, ptr %dst, align 1
ret void
}
-define void @var_bswap_store_i64_extrause(i64 %a, i64* %dst) {
+define void @var_bswap_store_i64_extrause(i64 %a, ptr %dst) {
; X64-LABEL: 'var_bswap_store_i64_extrause'
; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
-; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %bswap, i64* %dst, align 1
+; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %bswap, ptr %dst, align 1
; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap2 = shl i64 %bswap, 2
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X64-MOVBE-LABEL: 'var_bswap_store_i64_extrause'
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
-; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %bswap, i64* %dst, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %bswap, ptr %dst, align 1
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap2 = shl i64 %bswap, 2
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X64-FASTMOVBE-LABEL: 'var_bswap_store_i64_extrause'
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
-; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %bswap, i64* %dst, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %bswap, ptr %dst, align 1
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap2 = shl i64 %bswap, 2
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X32-LABEL: 'var_bswap_store_i64_extrause'
; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
-; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %bswap, i64* %dst, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %bswap, ptr %dst, align 1
; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap2 = shl i64 %bswap, 2
; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X32-MOVBE-LABEL: 'var_bswap_store_i64_extrause'
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
-; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %bswap, i64* %dst, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %bswap, ptr %dst, align 1
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap2 = shl i64 %bswap, 2
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X32-FASTMOVBE-LABEL: 'var_bswap_store_i64_extrause'
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
-; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %bswap, i64* %dst, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %bswap, ptr %dst, align 1
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap2 = shl i64 %bswap, 2
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%bswap = call i64 @llvm.bswap.i64(i64 %a)
- store i64 %bswap, i64* %dst, align 1
+ store i64 %bswap, ptr %dst, align 1
%bswap2 = shl i64 %bswap, 2 ; incur an extra use to the bswap
ret void
}
-define void @var_bswap_store_i128(i128 %a, i128* %dst) {
+define void @var_bswap_store_i128(i128 %a, ptr %dst) {
; X64-LABEL: 'var_bswap_store_i128'
; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
-; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %bswap, i128* %dst, align 1
+; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %bswap, ptr %dst, align 1
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X64-MOVBE-LABEL: 'var_bswap_store_i128'
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
-; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %bswap, i128* %dst, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %bswap, ptr %dst, align 1
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X64-FASTMOVBE-LABEL: 'var_bswap_store_i128'
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
-; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %bswap, i128* %dst, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %bswap, ptr %dst, align 1
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X32-LABEL: 'var_bswap_store_i128'
; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
-; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %bswap, i128* %dst, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %bswap, ptr %dst, align 1
; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X32-MOVBE-LABEL: 'var_bswap_store_i128'
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
-; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %bswap, i128* %dst, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %bswap, ptr %dst, align 1
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X32-FASTMOVBE-LABEL: 'var_bswap_store_i128'
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
-; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %bswap, i128* %dst, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %bswap, ptr %dst, align 1
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%bswap = call i128 @llvm.bswap.i128(i128 %a)
- store i128 %bswap, i128* %dst, align 1
+ store i128 %bswap, ptr %dst, align 1
ret void
}
-define void @var_bswap_store_i128_extrause(i128 %a, i128* %dst) {
+define void @var_bswap_store_i128_extrause(i128 %a, ptr %dst) {
; X64-LABEL: 'var_bswap_store_i128_extrause'
; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
-; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %bswap, i128* %dst, align 1
+; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %bswap, ptr %dst, align 1
; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap2 = shl i128 %bswap, 2
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X64-MOVBE-LABEL: 'var_bswap_store_i128_extrause'
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
-; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %bswap, i128* %dst, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %bswap, ptr %dst, align 1
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap2 = shl i128 %bswap, 2
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X64-FASTMOVBE-LABEL: 'var_bswap_store_i128_extrause'
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
-; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %bswap, i128* %dst, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %bswap, ptr %dst, align 1
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap2 = shl i128 %bswap, 2
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X32-LABEL: 'var_bswap_store_i128_extrause'
; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
-; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %bswap, i128* %dst, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %bswap, ptr %dst, align 1
; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap2 = shl i128 %bswap, 2
; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X32-MOVBE-LABEL: 'var_bswap_store_i128_extrause'
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
-; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %bswap, i128* %dst, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %bswap, ptr %dst, align 1
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap2 = shl i128 %bswap, 2
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; X32-FASTMOVBE-LABEL: 'var_bswap_store_i128_extrause'
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
-; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %bswap, i128* %dst, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %bswap, ptr %dst, align 1
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap2 = shl i128 %bswap, 2
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%bswap = call i128 @llvm.bswap.i128(i128 %a)
- store i128 %bswap, i128* %dst, align 1
+ store i128 %bswap, ptr %dst, align 1
%bswap2 = shl i128 %bswap, 2 ; incur an extra use to the bswap
; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %A1 = alloca i32, align 8
; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A2 = alloca i64, i64 undef, align 8
; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = add i64 undef, undef
-; LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = load i64, i64* undef, align 4
-; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %BC = bitcast i8* undef to i32*
-; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %I2P = inttoptr i64 undef to i8*
-; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %P2I = ptrtoint i8* undef to i64
+; LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = load i64, ptr undef, align 4
+; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %I2P = inttoptr i64 undef to ptr
+; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %P2I = ptrtoint ptr undef to i64
; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %TC = trunc i64 undef to i32
; LATENCY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void undef()
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %A1 = alloca i32, align 8
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A2 = alloca i64, i64 undef, align 8
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = add i64 undef, undef
-; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i64, i64* undef, align 4
-; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %BC = bitcast i8* undef to i32*
-; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %I2P = inttoptr i64 undef to i8*
-; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %P2I = ptrtoint i8* undef to i64
+; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i64, ptr undef, align 4
+; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %I2P = inttoptr i64 undef to ptr
+; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %P2I = ptrtoint ptr undef to i64
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %TC = trunc i64 undef to i32
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void undef()
%A1 = alloca i32, align 8
%A2 = alloca i64, i64 undef, align 8
%I64 = add i64 undef, undef
- load i64, i64* undef, align 4
- %BC = bitcast i8* undef to i32*
- %I2P = inttoptr i64 undef to i8*
- %P2I = ptrtoint i8* undef to i64
+ load i64, ptr undef, align 4
+ %I2P = inttoptr i64 undef to ptr
+ %P2I = ptrtoint ptr undef to i64
%TC = trunc i64 undef to i32
%uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
call void undef()
define i32 @trivially_free() {
; CHECK-SIZE-LABEL: 'trivially_free'
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = call i32 @llvm.annotation.i32.p0i8(i32 undef, i8* undef, i8* undef, i32 undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = call i32 @llvm.annotation.i32.p0(i32 undef, ptr undef, ptr undef, i32 undef)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.assume(i1 undef)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.experimental.noalias.scope.decl(metadata !0)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.sideeffect()
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = call {}* @llvm.invariant.start.p0i8(i64 1, i8* undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.invariant.end.p0i8({}* undef, i64 1, i8* undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = call i8* @llvm.strip.invariant.group.p0i8(i8* undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = call ptr @llvm.invariant.start.p0(i64 1, ptr undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.invariant.end.p0(ptr undef, i64 1, ptr undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = call ptr @llvm.launder.invariant.group.p0(ptr undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = call ptr @llvm.strip.invariant.group.p0(ptr undef)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = call i1 @llvm.is.constant.i32(i32 undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0i8(i64 1, i8* undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0i8(i64 1, i8* undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 true, i1 true, i1 true)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a6 = call i8* @llvm.ptr.annotation.p0i8.p0i8(i8* undef, i8* undef, i8* undef, i32 undef, i8* undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation.p0i8.p0i8(i8* undef, i8* undef, i8* undef, i32 undef, i8* undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0(i64 1, ptr undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0(i64 1, ptr undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0(ptr undef, i1 true, i1 true, i1 true)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a6 = call ptr @llvm.ptr.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-THROUGHPUT-LABEL: 'trivially_free'
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = call i32 @llvm.annotation.i32.p0i8(i32 undef, i8* undef, i8* undef, i32 undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = call i32 @llvm.annotation.i32.p0(i32 undef, ptr undef, ptr undef, i32 undef)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.assume(i1 undef)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.experimental.noalias.scope.decl(metadata !0)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.sideeffect()
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = call {}* @llvm.invariant.start.p0i8(i64 1, i8* undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.invariant.end.p0i8({}* undef, i64 1, i8* undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = call i8* @llvm.strip.invariant.group.p0i8(i8* undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = call ptr @llvm.invariant.start.p0(i64 1, ptr undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.invariant.end.p0(ptr undef, i64 1, ptr undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = call ptr @llvm.launder.invariant.group.p0(ptr undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = call ptr @llvm.strip.invariant.group.p0(ptr undef)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = call i1 @llvm.is.constant.i32(i32 undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0i8(i64 1, i8* undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0i8(i64 1, i8* undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 true, i1 true, i1 true)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a6 = call i8* @llvm.ptr.annotation.p0i8.p0i8(i8* undef, i8* undef, i8* undef, i32 undef, i8* undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation.p0i8.p0i8(i8* undef, i8* undef, i8* undef, i32 undef, i8* undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0(i64 1, ptr undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0(i64 1, ptr undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0(ptr undef, i1 true, i1 true, i1 true)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a6 = call ptr @llvm.ptr.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- %a0 = call i32 @llvm.annotation.i32(i32 undef, i8* undef, i8* undef, i32 undef)
+ %a0 = call i32 @llvm.annotation.i32(i32 undef, ptr undef, ptr undef, i32 undef)
call void @llvm.assume(i1 undef)
call void @llvm.experimental.noalias.scope.decl(metadata !4)
call void @llvm.sideeffect()
- call void @llvm.dbg.declare(metadata i8** undef, metadata !0, metadata !DIExpression())
+ call void @llvm.dbg.declare(metadata ptr undef, metadata !0, metadata !DIExpression())
call void @llvm.dbg.value(metadata i64 undef, i64 undef, metadata !DIExpression(), metadata !DIExpression())
call void @llvm.dbg.label(metadata !2)
- %a1 = call {}* @llvm.invariant.start.p0i8(i64 1, i8* undef)
- call void @llvm.invariant.end.p0i8({}* undef, i64 1, i8* undef)
- %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* undef)
- %a3 = call i8* @llvm.strip.invariant.group.p0i8(i8* undef)
+ %a1 = call ptr @llvm.invariant.start.p0(i64 1, ptr undef)
+ call void @llvm.invariant.end.p0(ptr undef, i64 1, ptr undef)
+ %a2 = call ptr @llvm.launder.invariant.group.p0(ptr undef)
+ %a3 = call ptr @llvm.strip.invariant.group.p0(ptr undef)
%a4 = call i1 @llvm.is.constant.i32(i32 undef)
- call void @llvm.lifetime.start.p0i8(i64 1, i8* undef)
- call void @llvm.lifetime.end.p0i8(i64 1, i8* undef)
- %a5 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 1, i1 1, i1 1)
- %a6 = call i8* @llvm.ptr.annotation.p0i8(i8* undef, i8* undef, i8* undef, i32 undef, i8* undef)
- call void @llvm.var.annotation(i8* undef, i8* undef, i8* undef, i32 undef, i8* undef)
+ call void @llvm.lifetime.start.p0(i64 1, ptr undef)
+ call void @llvm.lifetime.end.p0(i64 1, ptr undef)
+ %a5 = call i64 @llvm.objectsize.i64.p0(ptr undef, i1 1, i1 1, i1 1)
+ %a6 = call ptr @llvm.ptr.annotation.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
+ call void @llvm.var.annotation(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
ret i32 undef
}
-declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32)
+declare i32 @llvm.annotation.i32(i32, ptr, ptr, i32)
declare void @llvm.assume(i1)
declare void @llvm.experimental.noalias.scope.decl(metadata)
declare void @llvm.sideeffect()
declare void @llvm.dbg.declare(metadata, metadata, metadata)
declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
declare void @llvm.dbg.label(metadata)
-declare {}* @llvm.invariant.start.p0i8(i64, i8*)
-declare void @llvm.invariant.end.p0i8({}*, i64, i8*)
-declare i8* @llvm.launder.invariant.group.p0i8(i8*)
-declare i8* @llvm.strip.invariant.group.p0i8(i8*)
+declare ptr @llvm.invariant.start.p0(i64, ptr)
+declare void @llvm.invariant.end.p0(ptr, i64, ptr)
+declare ptr @llvm.launder.invariant.group.p0(ptr)
+declare ptr @llvm.strip.invariant.group.p0(ptr)
declare i1 @llvm.is.constant.i32(i32)
-declare void @llvm.lifetime.start.p0i8(i64, i8*)
-declare void @llvm.lifetime.end.p0i8(i64, i8*)
-declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1)
-declare i8* @llvm.ptr.annotation.p0i8(i8*, i8*, i8*, i32, i8*)
-declare void @llvm.var.annotation(i8*, i8*, i8*, i32, i8*)
+declare void @llvm.lifetime.start.p0(i64, ptr)
+declare void @llvm.lifetime.end.p0(i64, ptr)
+declare i64 @llvm.objectsize.i64.p0(ptr, i1, i1, i1)
+declare ptr @llvm.ptr.annotation.p0(ptr, ptr, ptr, i32, ptr)
+declare void @llvm.var.annotation(ptr, ptr, ptr, i32, ptr)
!0 = !DILocalVariable(scope: !1)
define void @test() {
; SSE-LABEL: 'test'
-; SSE: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i16, i16* %inB, align 2
-; SSE: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i16, i16* %inB, align 2
-; SSE: LV: Found an estimated cost of 48 for VF 4 For instruction: %valB = load i16, i16* %inB, align 2
-; SSE: LV: Found an estimated cost of 96 for VF 8 For instruction: %valB = load i16, i16* %inB, align 2
-; SSE: LV: Found an estimated cost of 192 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2
+; SSE: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i16, ptr %inB, align 2
+; SSE: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i16, ptr %inB, align 2
+; SSE: LV: Found an estimated cost of 48 for VF 4 For instruction: %valB = load i16, ptr %inB, align 2
+; SSE: LV: Found an estimated cost of 96 for VF 8 For instruction: %valB = load i16, ptr %inB, align 2
+; SSE: LV: Found an estimated cost of 192 for VF 16 For instruction: %valB = load i16, ptr %inB, align 2
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i16, i16* %inB, align 2
-; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i16, i16* %inB, align 2
-; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: %valB = load i16, i16* %inB, align 2
-; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: %valB = load i16, i16* %inB, align 2
-; AVX1: LV: Found an estimated cost of 193 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2
-; AVX1: LV: Found an estimated cost of 386 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i16, ptr %inB, align 2
+; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i16, ptr %inB, align 2
+; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: %valB = load i16, ptr %inB, align 2
+; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: %valB = load i16, ptr %inB, align 2
+; AVX1: LV: Found an estimated cost of 193 for VF 16 For instruction: %valB = load i16, ptr %inB, align 2
+; AVX1: LV: Found an estimated cost of 386 for VF 32 For instruction: %valB = load i16, ptr %inB, align 2
;
; AVX2-SLOWGATHER-LABEL: 'test'
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i16, i16* %inB, align 2
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB = load i16, i16* %inB, align 2
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 8 for VF 4 For instruction: %valB = load i16, i16* %inB, align 2
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 16 for VF 8 For instruction: %valB = load i16, i16* %inB, align 2
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 33 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 66 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i16, ptr %inB, align 2
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB = load i16, ptr %inB, align 2
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 8 for VF 4 For instruction: %valB = load i16, ptr %inB, align 2
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 16 for VF 8 For instruction: %valB = load i16, ptr %inB, align 2
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 33 for VF 16 For instruction: %valB = load i16, ptr %inB, align 2
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 66 for VF 32 For instruction: %valB = load i16, ptr %inB, align 2
;
; AVX2-FASTGATHER-LABEL: 'test'
-; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i16, i16* %inB, align 2
-; AVX2-FASTGATHER: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i16, i16* %inB, align 2
-; AVX2-FASTGATHER: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i16, i16* %inB, align 2
-; AVX2-FASTGATHER: LV: Found an estimated cost of 26 for VF 8 For instruction: %valB = load i16, i16* %inB, align 2
-; AVX2-FASTGATHER: LV: Found an estimated cost of 53 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2
-; AVX2-FASTGATHER: LV: Found an estimated cost of 106 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2
+; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i16, ptr %inB, align 2
+; AVX2-FASTGATHER: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i16, ptr %inB, align 2
+; AVX2-FASTGATHER: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i16, ptr %inB, align 2
+; AVX2-FASTGATHER: LV: Found an estimated cost of 26 for VF 8 For instruction: %valB = load i16, ptr %inB, align 2
+; AVX2-FASTGATHER: LV: Found an estimated cost of 53 for VF 16 For instruction: %valB = load i16, ptr %inB, align 2
+; AVX2-FASTGATHER: LV: Found an estimated cost of 106 for VF 32 For instruction: %valB = load i16, ptr %inB, align 2
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i16, i16* %inB, align 2
-; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i16, i16* %inB, align 2
-; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i16, i16* %inB, align 2
-; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: %valB = load i16, i16* %inB, align 2
-; AVX512: LV: Found an estimated cost of 55 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2
-; AVX512: LV: Found an estimated cost of 111 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2
-; AVX512: LV: Found an estimated cost of 222 for VF 64 For instruction: %valB = load i16, i16* %inB, align 2
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i16, ptr %inB, align 2
+; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i16, ptr %inB, align 2
+; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i16, ptr %inB, align 2
+; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: %valB = load i16, ptr %inB, align 2
+; AVX512: LV: Found an estimated cost of 55 for VF 16 For instruction: %valB = load i16, ptr %inB, align 2
+; AVX512: LV: Found an estimated cost of 111 for VF 32 For instruction: %valB = load i16, ptr %inB, align 2
+; AVX512: LV: Found an estimated cost of 222 for VF 64 For instruction: %valB = load i16, ptr %inB, align 2
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
- %valA = load i8, i8* %inA
+ %inA = getelementptr inbounds [1024 x i8], ptr @A, i64 0, i64 %iv
+ %valA = load i8, ptr %inA
%valA.ext = sext i8 %valA to i64
- %inB = getelementptr inbounds [1024 x i16], [1024 x i16]* @B, i64 0, i64 %valA.ext
- %valB = load i16, i16* %inB
+ %inB = getelementptr inbounds [1024 x i16], ptr @B, i64 0, i64 %valA.ext
+ %valB = load i16, ptr %inB
- %out = getelementptr inbounds [1024 x i16], [1024 x i16]* @C, i64 0, i64 %iv
- store i16 %valB, i16* %out
+ %out = getelementptr inbounds [1024 x i16], ptr @C, i64 0, i64 %iv
+ store i16 %valB, ptr %out
%iv.next = add nuw nsw i64 %iv, 1
%cmp = icmp ult i64 %iv.next, 1024
define void @test() {
; SSE2-LABEL: 'test'
-; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i32, i32* %inB, align 4
-; SSE2: LV: Found an estimated cost of 25 for VF 2 For instruction: %valB = load i32, i32* %inB, align 4
-; SSE2: LV: Found an estimated cost of 51 for VF 4 For instruction: %valB = load i32, i32* %inB, align 4
-; SSE2: LV: Found an estimated cost of 102 for VF 8 For instruction: %valB = load i32, i32* %inB, align 4
-; SSE2: LV: Found an estimated cost of 204 for VF 16 For instruction: %valB = load i32, i32* %inB, align 4
+; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i32, ptr %inB, align 4
+; SSE2: LV: Found an estimated cost of 25 for VF 2 For instruction: %valB = load i32, ptr %inB, align 4
+; SSE2: LV: Found an estimated cost of 51 for VF 4 For instruction: %valB = load i32, ptr %inB, align 4
+; SSE2: LV: Found an estimated cost of 102 for VF 8 For instruction: %valB = load i32, ptr %inB, align 4
+; SSE2: LV: Found an estimated cost of 204 for VF 16 For instruction: %valB = load i32, ptr %inB, align 4
;
; SSE42-LABEL: 'test'
-; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i32, i32* %inB, align 4
-; SSE42: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i32, i32* %inB, align 4
-; SSE42: LV: Found an estimated cost of 48 for VF 4 For instruction: %valB = load i32, i32* %inB, align 4
-; SSE42: LV: Found an estimated cost of 96 for VF 8 For instruction: %valB = load i32, i32* %inB, align 4
-; SSE42: LV: Found an estimated cost of 192 for VF 16 For instruction: %valB = load i32, i32* %inB, align 4
+; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i32, ptr %inB, align 4
+; SSE42: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i32, ptr %inB, align 4
+; SSE42: LV: Found an estimated cost of 48 for VF 4 For instruction: %valB = load i32, ptr %inB, align 4
+; SSE42: LV: Found an estimated cost of 96 for VF 8 For instruction: %valB = load i32, ptr %inB, align 4
+; SSE42: LV: Found an estimated cost of 192 for VF 16 For instruction: %valB = load i32, ptr %inB, align 4
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i32, i32* %inB, align 4
-; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i32, i32* %inB, align 4
-; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: %valB = load i32, i32* %inB, align 4
-; AVX1: LV: Found an estimated cost of 97 for VF 8 For instruction: %valB = load i32, i32* %inB, align 4
-; AVX1: LV: Found an estimated cost of 194 for VF 16 For instruction: %valB = load i32, i32* %inB, align 4
-; AVX1: LV: Found an estimated cost of 388 for VF 32 For instruction: %valB = load i32, i32* %inB, align 4
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i32, ptr %inB, align 4
+; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i32, ptr %inB, align 4
+; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: %valB = load i32, ptr %inB, align 4
+; AVX1: LV: Found an estimated cost of 97 for VF 8 For instruction: %valB = load i32, ptr %inB, align 4
+; AVX1: LV: Found an estimated cost of 194 for VF 16 For instruction: %valB = load i32, ptr %inB, align 4
+; AVX1: LV: Found an estimated cost of 388 for VF 32 For instruction: %valB = load i32, ptr %inB, align 4
;
; AVX2-SLOWGATHER-LABEL: 'test'
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i32, i32* %inB, align 4
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB = load i32, i32* %inB, align 4
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 8 for VF 4 For instruction: %valB = load i32, i32* %inB, align 4
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 17 for VF 8 For instruction: %valB = load i32, i32* %inB, align 4
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 34 for VF 16 For instruction: %valB = load i32, i32* %inB, align 4
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 68 for VF 32 For instruction: %valB = load i32, i32* %inB, align 4
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i32, ptr %inB, align 4
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB = load i32, ptr %inB, align 4
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 8 for VF 4 For instruction: %valB = load i32, ptr %inB, align 4
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 17 for VF 8 For instruction: %valB = load i32, ptr %inB, align 4
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 34 for VF 16 For instruction: %valB = load i32, ptr %inB, align 4
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 68 for VF 32 For instruction: %valB = load i32, ptr %inB, align 4
;
; AVX2-FASTGATHER-LABEL: 'test'
-; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i32, i32* %inB, align 4
-; AVX2-FASTGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB = load i32, i32* %inB, align 4
-; AVX2-FASTGATHER: LV: Found an estimated cost of 6 for VF 4 For instruction: %valB = load i32, i32* %inB, align 4
-; AVX2-FASTGATHER: LV: Found an estimated cost of 12 for VF 8 For instruction: %valB = load i32, i32* %inB, align 4
-; AVX2-FASTGATHER: LV: Found an estimated cost of 24 for VF 16 For instruction: %valB = load i32, i32* %inB, align 4
-; AVX2-FASTGATHER: LV: Found an estimated cost of 48 for VF 32 For instruction: %valB = load i32, i32* %inB, align 4
+; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i32, ptr %inB, align 4
+; AVX2-FASTGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB = load i32, ptr %inB, align 4
+; AVX2-FASTGATHER: LV: Found an estimated cost of 6 for VF 4 For instruction: %valB = load i32, ptr %inB, align 4
+; AVX2-FASTGATHER: LV: Found an estimated cost of 12 for VF 8 For instruction: %valB = load i32, ptr %inB, align 4
+; AVX2-FASTGATHER: LV: Found an estimated cost of 24 for VF 16 For instruction: %valB = load i32, ptr %inB, align 4
+; AVX2-FASTGATHER: LV: Found an estimated cost of 48 for VF 32 For instruction: %valB = load i32, ptr %inB, align 4
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i32, i32* %inB, align 4
-; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i32, i32* %inB, align 4
-; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i32, i32* %inB, align 4
-; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %valB = load i32, i32* %inB, align 4
-; AVX512: LV: Found an estimated cost of 18 for VF 16 For instruction: %valB = load i32, i32* %inB, align 4
-; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: %valB = load i32, i32* %inB, align 4
-; AVX512: LV: Found an estimated cost of 72 for VF 64 For instruction: %valB = load i32, i32* %inB, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i32, ptr %inB, align 4
+; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i32, ptr %inB, align 4
+; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i32, ptr %inB, align 4
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %valB = load i32, ptr %inB, align 4
+; AVX512: LV: Found an estimated cost of 18 for VF 16 For instruction: %valB = load i32, ptr %inB, align 4
+; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: %valB = load i32, ptr %inB, align 4
+; AVX512: LV: Found an estimated cost of 72 for VF 64 For instruction: %valB = load i32, ptr %inB, align 4
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
- %valA = load i8, i8* %inA
+ %inA = getelementptr inbounds [1024 x i8], ptr @A, i64 0, i64 %iv
+ %valA = load i8, ptr %inA
%valA.ext = sext i8 %valA to i64
- %inB = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %valA.ext
- %valB = load i32, i32* %inB
+ %inB = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %valA.ext
+ %valB = load i32, ptr %inB
- %out = getelementptr inbounds [1024 x i32], [1024 x i32]* @C, i64 0, i64 %iv
- store i32 %valB, i32* %out
+ %out = getelementptr inbounds [1024 x i32], ptr @C, i64 0, i64 %iv
+ store i32 %valB, ptr %out
%iv.next = add nuw nsw i64 %iv, 1
%cmp = icmp ult i64 %iv.next, 1024
define void @test() {
; SSE2-LABEL: 'test'
-; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, i64* %inB, align 8
-; SSE2: LV: Found an estimated cost of 25 for VF 2 For instruction: %valB = load i64, i64* %inB, align 8
-; SSE2: LV: Found an estimated cost of 50 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8
-; SSE2: LV: Found an estimated cost of 100 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8
-; SSE2: LV: Found an estimated cost of 200 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8
+; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, ptr %inB, align 8
+; SSE2: LV: Found an estimated cost of 25 for VF 2 For instruction: %valB = load i64, ptr %inB, align 8
+; SSE2: LV: Found an estimated cost of 50 for VF 4 For instruction: %valB = load i64, ptr %inB, align 8
+; SSE2: LV: Found an estimated cost of 100 for VF 8 For instruction: %valB = load i64, ptr %inB, align 8
+; SSE2: LV: Found an estimated cost of 200 for VF 16 For instruction: %valB = load i64, ptr %inB, align 8
;
; SSE42-LABEL: 'test'
-; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, i64* %inB, align 8
-; SSE42: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i64, i64* %inB, align 8
-; SSE42: LV: Found an estimated cost of 48 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8
-; SSE42: LV: Found an estimated cost of 96 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8
-; SSE42: LV: Found an estimated cost of 192 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8
+; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, ptr %inB, align 8
+; SSE42: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i64, ptr %inB, align 8
+; SSE42: LV: Found an estimated cost of 48 for VF 4 For instruction: %valB = load i64, ptr %inB, align 8
+; SSE42: LV: Found an estimated cost of 96 for VF 8 For instruction: %valB = load i64, ptr %inB, align 8
+; SSE42: LV: Found an estimated cost of 192 for VF 16 For instruction: %valB = load i64, ptr %inB, align 8
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, i64* %inB, align 8
-; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i64, i64* %inB, align 8
-; AVX1: LV: Found an estimated cost of 49 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8
-; AVX1: LV: Found an estimated cost of 98 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8
-; AVX1: LV: Found an estimated cost of 196 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8
-; AVX1: LV: Found an estimated cost of 392 for VF 32 For instruction: %valB = load i64, i64* %inB, align 8
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, ptr %inB, align 8
+; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i64, ptr %inB, align 8
+; AVX1: LV: Found an estimated cost of 49 for VF 4 For instruction: %valB = load i64, ptr %inB, align 8
+; AVX1: LV: Found an estimated cost of 98 for VF 8 For instruction: %valB = load i64, ptr %inB, align 8
+; AVX1: LV: Found an estimated cost of 196 for VF 16 For instruction: %valB = load i64, ptr %inB, align 8
+; AVX1: LV: Found an estimated cost of 392 for VF 32 For instruction: %valB = load i64, ptr %inB, align 8
;
; AVX2-SLOWGATHER-LABEL: 'test'
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, i64* %inB, align 8
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB = load i64, i64* %inB, align 8
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 9 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 18 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 36 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 72 for VF 32 For instruction: %valB = load i64, i64* %inB, align 8
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, ptr %inB, align 8
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB = load i64, ptr %inB, align 8
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 9 for VF 4 For instruction: %valB = load i64, ptr %inB, align 8
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 18 for VF 8 For instruction: %valB = load i64, ptr %inB, align 8
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 36 for VF 16 For instruction: %valB = load i64, ptr %inB, align 8
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 72 for VF 32 For instruction: %valB = load i64, ptr %inB, align 8
;
; AVX2-FASTGATHER-LABEL: 'test'
-; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, i64* %inB, align 8
-; AVX2-FASTGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB = load i64, i64* %inB, align 8
-; AVX2-FASTGATHER: LV: Found an estimated cost of 6 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8
-; AVX2-FASTGATHER: LV: Found an estimated cost of 12 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8
-; AVX2-FASTGATHER: LV: Found an estimated cost of 24 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8
-; AVX2-FASTGATHER: LV: Found an estimated cost of 48 for VF 32 For instruction: %valB = load i64, i64* %inB, align 8
+; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, ptr %inB, align 8
+; AVX2-FASTGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB = load i64, ptr %inB, align 8
+; AVX2-FASTGATHER: LV: Found an estimated cost of 6 for VF 4 For instruction: %valB = load i64, ptr %inB, align 8
+; AVX2-FASTGATHER: LV: Found an estimated cost of 12 for VF 8 For instruction: %valB = load i64, ptr %inB, align 8
+; AVX2-FASTGATHER: LV: Found an estimated cost of 24 for VF 16 For instruction: %valB = load i64, ptr %inB, align 8
+; AVX2-FASTGATHER: LV: Found an estimated cost of 48 for VF 32 For instruction: %valB = load i64, ptr %inB, align 8
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, i64* %inB, align 8
-; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i64, i64* %inB, align 8
-; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8
-; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8
-; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8
-; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %valB = load i64, i64* %inB, align 8
-; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %valB = load i64, i64* %inB, align 8
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, ptr %inB, align 8
+; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i64, ptr %inB, align 8
+; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: %valB = load i64, ptr %inB, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %valB = load i64, ptr %inB, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %valB = load i64, ptr %inB, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %valB = load i64, ptr %inB, align 8
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %valB = load i64, ptr %inB, align 8
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
- %valA = load i8, i8* %inA
+ %inA = getelementptr inbounds [1024 x i8], ptr @A, i64 0, i64 %iv
+ %valA = load i8, ptr %inA
%valA.ext = sext i8 %valA to i64
- %inB = getelementptr inbounds [1024 x i64], [1024 x i64]* @B, i64 0, i64 %valA.ext
- %valB = load i64, i64* %inB
+ %inB = getelementptr inbounds [1024 x i64], ptr @B, i64 0, i64 %valA.ext
+ %valB = load i64, ptr %inB
- %out = getelementptr inbounds [1024 x i64], [1024 x i64]* @C, i64 0, i64 %iv
- store i64 %valB, i64* %out
+ %out = getelementptr inbounds [1024 x i64], ptr @C, i64 0, i64 %iv
+ store i64 %valB, ptr %out
%iv.next = add nuw nsw i64 %iv, 1
%cmp = icmp ult i64 %iv.next, 1024
define void @test() {
; SSE2-LABEL: 'test'
-; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, i8* %inB, align 1
-; SSE2: LV: Found an estimated cost of 25 for VF 2 For instruction: %valB = load i8, i8* %inB, align 1
-; SSE2: LV: Found an estimated cost of 51 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1
-; SSE2: LV: Found an estimated cost of 103 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1
-; SSE2: LV: Found an estimated cost of 207 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1
+; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, ptr %inB, align 1
+; SSE2: LV: Found an estimated cost of 25 for VF 2 For instruction: %valB = load i8, ptr %inB, align 1
+; SSE2: LV: Found an estimated cost of 51 for VF 4 For instruction: %valB = load i8, ptr %inB, align 1
+; SSE2: LV: Found an estimated cost of 103 for VF 8 For instruction: %valB = load i8, ptr %inB, align 1
+; SSE2: LV: Found an estimated cost of 207 for VF 16 For instruction: %valB = load i8, ptr %inB, align 1
;
; SSE42-LABEL: 'test'
-; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, i8* %inB, align 1
-; SSE42: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i8, i8* %inB, align 1
-; SSE42: LV: Found an estimated cost of 48 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1
-; SSE42: LV: Found an estimated cost of 96 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1
-; SSE42: LV: Found an estimated cost of 192 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1
+; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, ptr %inB, align 1
+; SSE42: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i8, ptr %inB, align 1
+; SSE42: LV: Found an estimated cost of 48 for VF 4 For instruction: %valB = load i8, ptr %inB, align 1
+; SSE42: LV: Found an estimated cost of 96 for VF 8 For instruction: %valB = load i8, ptr %inB, align 1
+; SSE42: LV: Found an estimated cost of 192 for VF 16 For instruction: %valB = load i8, ptr %inB, align 1
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, i8* %inB, align 1
-; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i8, i8* %inB, align 1
-; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1
-; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1
-; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1
-; AVX1: LV: Found an estimated cost of 385 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, ptr %inB, align 1
+; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i8, ptr %inB, align 1
+; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: %valB = load i8, ptr %inB, align 1
+; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: %valB = load i8, ptr %inB, align 1
+; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: %valB = load i8, ptr %inB, align 1
+; AVX1: LV: Found an estimated cost of 385 for VF 32 For instruction: %valB = load i8, ptr %inB, align 1
;
; AVX2-SLOWGATHER-LABEL: 'test'
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, i8* %inB, align 1
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB = load i8, i8* %inB, align 1
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 8 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 16 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 32 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 65 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, ptr %inB, align 1
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB = load i8, ptr %inB, align 1
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 8 for VF 4 For instruction: %valB = load i8, ptr %inB, align 1
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 16 for VF 8 For instruction: %valB = load i8, ptr %inB, align 1
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 32 for VF 16 For instruction: %valB = load i8, ptr %inB, align 1
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 65 for VF 32 For instruction: %valB = load i8, ptr %inB, align 1
;
; AVX2-FASTGATHER-LABEL: 'test'
-; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, i8* %inB, align 1
-; AVX2-FASTGATHER: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i8, i8* %inB, align 1
-; AVX2-FASTGATHER: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1
-; AVX2-FASTGATHER: LV: Found an estimated cost of 26 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1
-; AVX2-FASTGATHER: LV: Found an estimated cost of 52 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1
-; AVX2-FASTGATHER: LV: Found an estimated cost of 105 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1
+; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, ptr %inB, align 1
+; AVX2-FASTGATHER: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i8, ptr %inB, align 1
+; AVX2-FASTGATHER: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i8, ptr %inB, align 1
+; AVX2-FASTGATHER: LV: Found an estimated cost of 26 for VF 8 For instruction: %valB = load i8, ptr %inB, align 1
+; AVX2-FASTGATHER: LV: Found an estimated cost of 52 for VF 16 For instruction: %valB = load i8, ptr %inB, align 1
+; AVX2-FASTGATHER: LV: Found an estimated cost of 105 for VF 32 For instruction: %valB = load i8, ptr %inB, align 1
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, i8* %inB, align 1
-; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i8, i8* %inB, align 1
-; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1
-; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1
-; AVX512: LV: Found an estimated cost of 54 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1
-; AVX512: LV: Found an estimated cost of 109 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1
-; AVX512: LV: Found an estimated cost of 219 for VF 64 For instruction: %valB = load i8, i8* %inB, align 1
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, ptr %inB, align 1
+; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i8, ptr %inB, align 1
+; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i8, ptr %inB, align 1
+; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: %valB = load i8, ptr %inB, align 1
+; AVX512: LV: Found an estimated cost of 54 for VF 16 For instruction: %valB = load i8, ptr %inB, align 1
+; AVX512: LV: Found an estimated cost of 109 for VF 32 For instruction: %valB = load i8, ptr %inB, align 1
+; AVX512: LV: Found an estimated cost of 219 for VF 64 For instruction: %valB = load i8, ptr %inB, align 1
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
- %valA = load i8, i8* %inA
+ %inA = getelementptr inbounds [1024 x i8], ptr @A, i64 0, i64 %iv
+ %valA = load i8, ptr %inA
%valA.ext = sext i8 %valA to i64
- %inB = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %valA.ext
- %valB = load i8, i8* %inB
+ %inB = getelementptr inbounds [1024 x i8], ptr @B, i64 0, i64 %valA.ext
+ %valB = load i8, ptr %inB
- %out = getelementptr inbounds [1024 x i8], [1024 x i8]* @C, i64 0, i64 %iv
- store i8 %valB, i8* %out
+ %out = getelementptr inbounds [1024 x i8], ptr @C, i64 0, i64 %iv
+ store i8 %valB, ptr %out
%iv.next = add nuw nsw i64 %iv, 1
%cmp = icmp ult i64 %iv.next, 1024
define void @test_geps() {
; THRU-LABEL: 'test_geps'
-; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* undef, i32 0
-; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* undef, i32 0
-; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds i32, i32* undef, i32 0
-; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds i64, i64* undef, i32 0
-; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = getelementptr inbounds float, float* undef, i32 0
-; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds double, double* undef, i32 0
-; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
-; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
-; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
-; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
-; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
-; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
-; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep0 = getelementptr inbounds i8, i8* undef, i64 9223372036854775807
-; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %giant_gep1 = getelementptr inbounds i8, i8* undef, i128 295147905179352825855
+; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep0 = getelementptr inbounds i8, ptr undef, i64 9223372036854775807
+; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %giant_gep1 = getelementptr inbounds i8, ptr undef, i128 295147905179352825855
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; LATE-LABEL: 'test_geps'
-; LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* undef, i32 0
-; LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* undef, i32 0
-; LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds i32, i32* undef, i32 0
-; LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds i64, i64* undef, i32 0
-; LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = getelementptr inbounds float, float* undef, i32 0
-; LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds double, double* undef, i32 0
-; LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
-; LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
-; LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
-; LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
-; LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
-; LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
-; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep0 = getelementptr inbounds i8, i8* undef, i64 9223372036854775807
-; LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %giant_gep1 = getelementptr inbounds i8, i8* undef, i128 295147905179352825855
+; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep0 = getelementptr inbounds i8, ptr undef, i64 9223372036854775807
+; LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %giant_gep1 = getelementptr inbounds i8, ptr undef, i128 295147905179352825855
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE-LABEL: 'test_geps'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* undef, i32 0
-; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* undef, i32 0
-; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds i32, i32* undef, i32 0
-; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds i64, i64* undef, i32 0
-; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = getelementptr inbounds float, float* undef, i32 0
-; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds double, double* undef, i32 0
-; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
-; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
-; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
-; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
-; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
-; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
-; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep0 = getelementptr inbounds i8, i8* undef, i64 9223372036854775807
-; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %giant_gep1 = getelementptr inbounds i8, i8* undef, i128 295147905179352825855
+; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep0 = getelementptr inbounds i8, ptr undef, i64 9223372036854775807
+; SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %giant_gep1 = getelementptr inbounds i8, ptr undef, i128 295147905179352825855
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE_LATE-LABEL: 'test_geps'
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* undef, i32 0
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* undef, i32 0
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds i32, i32* undef, i32 0
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds i64, i64* undef, i32 0
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = getelementptr inbounds float, float* undef, i32 0
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds double, double* undef, i32 0
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep0 = getelementptr inbounds i8, i8* undef, i64 9223372036854775807
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %giant_gep1 = getelementptr inbounds i8, i8* undef, i128 295147905179352825855
+; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep0 = getelementptr inbounds i8, ptr undef, i64 9223372036854775807
+; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %giant_gep1 = getelementptr inbounds i8, ptr undef, i128 295147905179352825855
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; Cost of scalar geps should be zero.
; We expect it to be folded into the instruction addressing mode.
- %a0 = getelementptr inbounds i8, i8* undef, i32 0
- %a1 = getelementptr inbounds i16, i16* undef, i32 0
- %a2 = getelementptr inbounds i32, i32* undef, i32 0
- %a3 = getelementptr inbounds i64, i64* undef, i32 0
- %a4 = getelementptr inbounds float, float* undef, i32 0
- %a5 = getelementptr inbounds double, double* undef, i32 0
; Vector geps should also have zero cost.
- %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
- %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
- %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
- %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
- %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
- %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
; Check that we handle outlandishly large GEPs properly. This is unlikely to
; be a valid pointer, but LLVM still generates GEPs like this sometimes in
; dead code.
; This GEP has index INT64_MAX, which is cost 1.
- %giant_gep0 = getelementptr inbounds i8, i8* undef, i64 9223372036854775807
+ %giant_gep0 = getelementptr inbounds i8, ptr undef, i64 9223372036854775807
; This GEP index wraps around to -1, which is cost 0.
- %giant_gep1 = getelementptr inbounds i8, i8* undef, i128 295147905179352825855
+ %giant_gep1 = getelementptr inbounds i8, ptr undef, i128 295147905179352825855
ret void
}
for.body: ; preds = %for.body.lr.ph, %for.body
%i.073 = phi i32 [ 0, %for.body.lr.ph ], [ %add46, %for.body ]
- %arrayidx = getelementptr inbounds [120 x half], [120 x half]* @src, i32 0, i32 %i.073
- %0 = load half, half* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds [120 x half], ptr @src, i32 0, i32 %i.073
+ %0 = load half, ptr %arrayidx, align 4
%mul = fmul fast half %0, %k
- %arrayidx2 = getelementptr inbounds [120 x half], [120 x half]* @dst, i32 0, i32 %i.073
- %1 = load half, half* %arrayidx2, align 4
+ %arrayidx2 = getelementptr inbounds [120 x half], ptr @dst, i32 0, i32 %i.073
+ %1 = load half, ptr %arrayidx2, align 4
%add3 = fadd fast half %1, %mul
- store half %add3, half* %arrayidx2, align 4
+ store half %add3, ptr %arrayidx2, align 4
%add4 = or i32 %i.073, 1
- %arrayidx5 = getelementptr inbounds [120 x half], [120 x half]* @src, i32 0, i32 %add4
- %2 = load half, half* %arrayidx5, align 4
+ %arrayidx5 = getelementptr inbounds [120 x half], ptr @src, i32 0, i32 %add4
+ %2 = load half, ptr %arrayidx5, align 4
%mul6 = fmul fast half %2, %k
- %arrayidx8 = getelementptr inbounds [120 x half], [120 x half]* @dst, i32 0, i32 %add4
- %3 = load half, half* %arrayidx8, align 4
+ %arrayidx8 = getelementptr inbounds [120 x half], ptr @dst, i32 0, i32 %add4
+ %3 = load half, ptr %arrayidx8, align 4
%add9 = fadd fast half %3, %mul6
- store half %add9, half* %arrayidx8, align 4
+ store half %add9, ptr %arrayidx8, align 4
%add10 = or i32 %i.073, 2
- %arrayidx11 = getelementptr inbounds [120 x half], [120 x half]* @src, i32 0, i32 %add10
- %4 = load half, half* %arrayidx11, align 4
+ %arrayidx11 = getelementptr inbounds [120 x half], ptr @src, i32 0, i32 %add10
+ %4 = load half, ptr %arrayidx11, align 4
%mul12 = fmul fast half %4, %k
- %arrayidx14 = getelementptr inbounds [120 x half], [120 x half]* @dst, i32 0, i32 %add10
- %5 = load half, half* %arrayidx14, align 4
+ %arrayidx14 = getelementptr inbounds [120 x half], ptr @dst, i32 0, i32 %add10
+ %5 = load half, ptr %arrayidx14, align 4
%add15 = fadd fast half %5, %mul12
- store half %add15, half* %arrayidx14, align 4
+ store half %add15, ptr %arrayidx14, align 4
%add16 = or i32 %i.073, 3
- %arrayidx17 = getelementptr inbounds [120 x half], [120 x half]* @src, i32 0, i32 %add16
- %6 = load half, half* %arrayidx17, align 4
+ %arrayidx17 = getelementptr inbounds [120 x half], ptr @src, i32 0, i32 %add16
+ %6 = load half, ptr %arrayidx17, align 4
%mul18 = fmul fast half %6, %k
- %arrayidx20 = getelementptr inbounds [120 x half], [120 x half]* @dst, i32 0, i32 %add16
- %7 = load half, half* %arrayidx20, align 4
+ %arrayidx20 = getelementptr inbounds [120 x half], ptr @dst, i32 0, i32 %add16
+ %7 = load half, ptr %arrayidx20, align 4
%add21 = fadd fast half %7, %mul18
- store half %add21, half* %arrayidx20, align 4
+ store half %add21, ptr %arrayidx20, align 4
%add22 = or i32 %i.073, 4
- %arrayidx23 = getelementptr inbounds [120 x half], [120 x half]* @src, i32 0, i32 %add22
- %8 = load half, half* %arrayidx23, align 4
+ %arrayidx23 = getelementptr inbounds [120 x half], ptr @src, i32 0, i32 %add22
+ %8 = load half, ptr %arrayidx23, align 4
%mul24 = fmul fast half %8, %k
- %arrayidx26 = getelementptr inbounds [120 x half], [120 x half]* @dst, i32 0, i32 %add22
- %9 = load half, half* %arrayidx26, align 4
+ %arrayidx26 = getelementptr inbounds [120 x half], ptr @dst, i32 0, i32 %add22
+ %9 = load half, ptr %arrayidx26, align 4
%add27 = fadd fast half %9, %mul24
- store half %add27, half* %arrayidx26, align 4
+ store half %add27, ptr %arrayidx26, align 4
%add28 = or i32 %i.073, 5
- %arrayidx29 = getelementptr inbounds [120 x half], [120 x half]* @src, i32 0, i32 %add28
- %10 = load half, half* %arrayidx29, align 4
+ %arrayidx29 = getelementptr inbounds [120 x half], ptr @src, i32 0, i32 %add28
+ %10 = load half, ptr %arrayidx29, align 4
%mul30 = fmul fast half %10, %k
- %arrayidx32 = getelementptr inbounds [120 x half], [120 x half]* @dst, i32 0, i32 %add28
- %11 = load half, half* %arrayidx32, align 4
+ %arrayidx32 = getelementptr inbounds [120 x half], ptr @dst, i32 0, i32 %add28
+ %11 = load half, ptr %arrayidx32, align 4
%add33 = fadd fast half %11, %mul30
- store half %add33, half* %arrayidx32, align 4
+ store half %add33, ptr %arrayidx32, align 4
%add34 = or i32 %i.073, 6
- %arrayidx35 = getelementptr inbounds [120 x half], [120 x half]* @src, i32 0, i32 %add34
- %12 = load half, half* %arrayidx35, align 4
+ %arrayidx35 = getelementptr inbounds [120 x half], ptr @src, i32 0, i32 %add34
+ %12 = load half, ptr %arrayidx35, align 4
%mul36 = fmul fast half %12, %k
- %arrayidx38 = getelementptr inbounds [120 x half], [120 x half]* @dst, i32 0, i32 %add34
- %13 = load half, half* %arrayidx38, align 4
+ %arrayidx38 = getelementptr inbounds [120 x half], ptr @dst, i32 0, i32 %add34
+ %13 = load half, ptr %arrayidx38, align 4
%add39 = fadd fast half %13, %mul36
- store half %add39, half* %arrayidx38, align 4
+ store half %add39, ptr %arrayidx38, align 4
%add40 = or i32 %i.073, 7
- %arrayidx41 = getelementptr inbounds [120 x half], [120 x half]* @src, i32 0, i32 %add40
- %14 = load half, half* %arrayidx41, align 4
+ %arrayidx41 = getelementptr inbounds [120 x half], ptr @src, i32 0, i32 %add40
+ %14 = load half, ptr %arrayidx41, align 4
%mul42 = fmul fast half %14, %k
- %arrayidx44 = getelementptr inbounds [120 x half], [120 x half]* @dst, i32 0, i32 %add40
- %15 = load half, half* %arrayidx44, align 4
+ %arrayidx44 = getelementptr inbounds [120 x half], ptr @dst, i32 0, i32 %add40
+ %15 = load half, ptr %arrayidx44, align 4
%add45 = fadd fast half %15, %mul42
- store half %add45, half* %arrayidx44, align 4
+ store half %add45, ptr %arrayidx44, align 4
%add46 = add nuw nsw i32 %i.073, 8
%cmp = icmp slt i32 %add46, %width_
br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
for.body: ; preds = %for.body.lr.ph, %for.body
%i.028 = phi i32 [ 0, %for.body.lr.ph ], [ %add16, %for.body ]
- %arrayidx = getelementptr inbounds [120 x half], [120 x half]* @src, i32 0, i32 %i.028
- %0 = load half, half* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds [120 x half], ptr @src, i32 0, i32 %i.028
+ %0 = load half, ptr %arrayidx, align 4
%mul = fmul fast half %0, %k
- %arrayidx2 = getelementptr inbounds [120 x half], [120 x half]* @dst, i32 0, i32 %i.028
- %1 = load half, half* %arrayidx2, align 4
+ %arrayidx2 = getelementptr inbounds [120 x half], ptr @dst, i32 0, i32 %i.028
+ %1 = load half, ptr %arrayidx2, align 4
%add3 = fadd fast half %1, %mul
- store half %add3, half* %arrayidx2, align 4
+ store half %add3, ptr %arrayidx2, align 4
%add4 = add nuw nsw i32 %i.028, 1
- %arrayidx5 = getelementptr inbounds [120 x half], [120 x half]* @src, i32 0, i32 %add4
- %2 = load half, half* %arrayidx5, align 4
+ %arrayidx5 = getelementptr inbounds [120 x half], ptr @src, i32 0, i32 %add4
+ %2 = load half, ptr %arrayidx5, align 4
%mul6 = fmul fast half %2, %k
- %arrayidx8 = getelementptr inbounds [120 x half], [120 x half]* @dst, i32 0, i32 %add4
- %3 = load half, half* %arrayidx8, align 4
+ %arrayidx8 = getelementptr inbounds [120 x half], ptr @dst, i32 0, i32 %add4
+ %3 = load half, ptr %arrayidx8, align 4
%add9 = fadd fast half %3, %mul6
- store half %add9, half* %arrayidx8, align 4
+ store half %add9, ptr %arrayidx8, align 4
%add10 = add nuw nsw i32 %i.028, 2
- %arrayidx11 = getelementptr inbounds [120 x half], [120 x half]* @src, i32 0, i32 %add10
- %4 = load half, half* %arrayidx11, align 4
+ %arrayidx11 = getelementptr inbounds [120 x half], ptr @src, i32 0, i32 %add10
+ %4 = load half, ptr %arrayidx11, align 4
%mul12 = fmul fast half %4, %k
- %arrayidx14 = getelementptr inbounds [120 x half], [120 x half]* @dst, i32 0, i32 %add10
- %5 = load half, half* %arrayidx14, align 4
+ %arrayidx14 = getelementptr inbounds [120 x half], ptr @dst, i32 0, i32 %add10
+ %5 = load half, ptr %arrayidx14, align 4
%add15 = fadd fast half %5, %mul12
- store half %add15, half* %arrayidx14, align 4
+ store half %add15, ptr %arrayidx14, align 4
%add16 = add nuw nsw i32 %i.028, 3
%cmp = icmp slt i32 %add16, %width_
br i1 %cmp, label %for.body, label %for.cond.cleanup
define void @test() {
; SSE2-LABEL: 'test'
-; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 30 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 60 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
+; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 30 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 60 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 21 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 42 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 84 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 21 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 42 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 84 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
;
; AVX2-LABEL: 'test'
-; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 2 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 4 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 8 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 16 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 2 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 16 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 1 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 2 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 13 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 50 for VF 64 For instruction: %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 2 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 13 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 50 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
;
entry:
br label %for.body
%iv.0 = add nuw nsw i64 %iv, 0
- %in0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %iv.0
+ %in0 = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %iv.0
- %v0 = load i32, i32* %in0
+ %v0 = load i32, ptr %in0
%reduce.add.0 = add i32 %v0, 0
%reduce.add.0.narrow = trunc i32 %reduce.add.0 to i8
- %out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0
- store i8 %reduce.add.0.narrow, i8* %out
+ %out = getelementptr inbounds [1024 x i8], ptr @B, i64 0, i64 %iv.0
+ store i8 %reduce.add.0.narrow, ptr %out
%iv.next = add nuw nsw i64 %iv, 2
%cmp = icmp ult i64 %iv.next, 1024
define void @test() {
; SSE2-LABEL: 'test'
-; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 31 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 62 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 124 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
+; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 31 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 62 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 124 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 19 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 40 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 80 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 160 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 19 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 40 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 80 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 160 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
;
; AVX2-LABEL: 'test'
-; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 16 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 34 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 16 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 5 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 9 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 144 for VF 64 For instruction: %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 5 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 9 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 144 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
;
entry:
br label %for.body
%iv.0 = add nuw nsw i64 %iv, 0
%iv.1 = add nuw nsw i64 %iv, 1
- %in0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %iv.0
- %in1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %iv.1
+ %in0 = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %iv.0
+ %in1 = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %iv.1
- %v0 = load i32, i32* %in0
- %v1 = load i32, i32* %in1
+ %v0 = load i32, ptr %in0
+ %v1 = load i32, ptr %in1
%reduce.add.0 = add i32 %v0, %v1
%reduce.add.1 = add i32 %reduce.add.0, 0
%reduce.add.1.narrow = trunc i32 %reduce.add.1 to i8
- %out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0
- store i8 %reduce.add.1.narrow, i8* %out
+ %out = getelementptr inbounds [1024 x i8], ptr @B, i64 0, i64 %iv.0
+ store i8 %reduce.add.1.narrow, ptr %out
%iv.next = add nuw nsw i64 %iv, 3
%cmp = icmp ult i64 %iv.next, 1024
define void @test() {
; SSE2-LABEL: 'test'
-; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 17 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 34 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 68 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
+; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 17 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 34 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 68 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 46 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 92 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 46 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 92 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
;
; AVX2-LABEL: 'test'
-; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 11 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 23 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 11 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 23 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 2 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 3 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 21 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 78 for VF 64 For instruction: %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 2 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 3 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 21 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 78 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
;
entry:
br label %for.body
%iv.0 = add nuw nsw i64 %iv, 0
- %in0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %iv.0
+ %in0 = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %iv.0
- %v0 = load i32, i32* %in0
+ %v0 = load i32, ptr %in0
%reduce.add.0 = add i32 %v0, 0
%reduce.add.1 = add i32 %reduce.add.0, 0
%reduce.add.1.narrow = trunc i32 %reduce.add.1 to i8
- %out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0
- store i8 %reduce.add.1.narrow, i8* %out
+ %out = getelementptr inbounds [1024 x i8], ptr @B, i64 0, i64 %iv.0
+ store i8 %reduce.add.1.narrow, ptr %out
%iv.next = add nuw nsw i64 %iv, 3
%cmp = icmp ult i64 %iv.next, 1024
define void @test() {
; SSE2-LABEL: 'test'
-; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 21 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 90 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 180 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
+; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 21 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 90 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 180 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 59 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 118 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 236 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 59 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 118 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 236 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
;
; AVX2-LABEL: 'test'
-; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 67 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 67 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 17 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 71 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 17 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 71 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
;
entry:
br label %for.body
%iv.1 = add nuw nsw i64 %iv, 1
%iv.2 = add nuw nsw i64 %iv, 2
- %in0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %iv.0
- %in1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %iv.1
- %in2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %iv.2
+ %in0 = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %iv.0
+ %in1 = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %iv.1
+ %in2 = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %iv.2
- %v0 = load i32, i32* %in0
- %v1 = load i32, i32* %in1
- %v2 = load i32, i32* %in2
+ %v0 = load i32, ptr %in0
+ %v1 = load i32, ptr %in1
+ %v2 = load i32, ptr %in2
%reduce.add.0 = add i32 %v0, %v1
%reduce.add.1 = add i32 %reduce.add.0, %v2
%reduce.add.2.narrow = trunc i32 %reduce.add.2 to i8
- %out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0
- store i8 %reduce.add.2.narrow, i8* %out
+ %out = getelementptr inbounds [1024 x i8], ptr @B, i64 0, i64 %iv.0
+ store i8 %reduce.add.2.narrow, ptr %out
%iv.next = add nuw nsw i64 %iv, 4
%cmp = icmp ult i64 %iv.next, 1024
define void @test() {
; SSE2-LABEL: 'test'
-; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 30 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 60 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 120 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
+; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 30 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 60 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 120 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 10 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 20 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 42 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 84 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 168 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 10 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 20 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 42 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 84 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 168 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
;
; AVX2-LABEL: 'test'
-; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 24 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 50 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 24 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 50 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 5 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 13 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 50 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 160 for VF 64 For instruction: %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 5 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 13 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 50 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 160 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
;
entry:
br label %for.body
%iv.0 = add nuw nsw i64 %iv, 0
%iv.1 = add nuw nsw i64 %iv, 1
- %in0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %iv.0
- %in1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %iv.1
+ %in0 = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %iv.0
+ %in1 = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %iv.1
- %v0 = load i32, i32* %in0
- %v1 = load i32, i32* %in1
+ %v0 = load i32, ptr %in0
+ %v1 = load i32, ptr %in1
%reduce.add.0 = add i32 %v0, %v1
%reduce.add.1 = add i32 %reduce.add.0, 0
%reduce.add.2.narrow = trunc i32 %reduce.add.2 to i8
- %out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0
- store i8 %reduce.add.2.narrow, i8* %out
+ %out = getelementptr inbounds [1024 x i8], ptr @B, i64 0, i64 %iv.0
+ store i8 %reduce.add.2.narrow, ptr %out
%iv.next = add nuw nsw i64 %iv, 4
%cmp = icmp ult i64 %iv.next, 1024
define void @test() {
; SSE2-LABEL: 'test'
-; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 15 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 30 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; SSE2: LV: Found an estimated cost of 60 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
+; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 15 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 30 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; SSE2: LV: Found an estimated cost of 60 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 6 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 12 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 25 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 50 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX1: LV: Found an estimated cost of 100 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 6 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 12 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 25 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 50 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX1: LV: Found an estimated cost of 100 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
;
; AVX2-LABEL: 'test'
-; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 16 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX2: LV: Found an estimated cost of 33 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
+; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 16 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 33 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 2 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 29 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4
-; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i32, i32* %in0, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 2 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 29 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
;
entry:
br label %for.body
%iv.0 = add nuw nsw i64 %iv, 0
- %in0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %iv.0
+ %in0 = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %iv.0
- %v0 = load i32, i32* %in0
+ %v0 = load i32, ptr %in0
%reduce.add.0 = add i32 %v0, 0
%reduce.add.1 = add i32 %reduce.add.0, 0
%reduce.add.2.narrow = trunc i32 %reduce.add.2 to i8
- %out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0
- store i8 %reduce.add.2.narrow, i8* %out
+ %out = getelementptr inbounds [1024 x i8], ptr @B, i64 0, i64 %iv.0
+ store i8 %reduce.add.2.narrow, ptr %out
%iv.next = add nuw nsw i64 %iv, 4
%cmp = icmp ult i64 %iv.next, 1024
declare i32 @llvm.fshl.i32(i32, i32, i32)
declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
-declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
-declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>)
+declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x float>)
+declare void @llvm.masked.scatter.v16f32.v16p0(<16 x float>, <16 x ptr>, i32, <16 x i1>)
declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>)
declare float @llvm.vector.reduce.fmul.v16f32(float, <16 x float>)
declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>)
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
+declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1)
define void @umul(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) {
; THRU-LABEL: 'umul'
ret void
}
-define void @maskedgather(<16 x float*> %va, <16 x i1> %vb, <16 x float> %vc) {
+define void @maskedgather(<16 x ptr> %va, <16 x i1> %vb, <16 x float> %vc) {
; THRU-LABEL: 'maskedgather'
-; THRU-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
+; THRU-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; LATE-LABEL: 'maskedgather'
-; LATE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
+; LATE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE-LABEL: 'maskedgather'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE_LATE-LABEL: 'maskedgather'
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
+; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
+ %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
ret void
}
-define void @maskedscatter(<16 x float> %va, <16 x float*> %vb, <16 x i1> %vc) {
+define void @maskedscatter(<16 x float> %va, <16 x ptr> %vb, <16 x i1> %vc) {
; THRU-LABEL: 'maskedscatter'
-; THRU-NEXT: Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
+; THRU-NEXT: Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc)
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; LATE-LABEL: 'maskedscatter'
-; LATE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
+; LATE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc)
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE-LABEL: 'maskedscatter'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE_LATE-LABEL: 'maskedscatter'
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
+; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc)
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
+ call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc)
ret void
}
ret void
}
-define void @memcpy(i8* %a, i8* %b, i32 %c) {
+define void @memcpy(ptr %a, ptr %b, i32 %c) {
; THRU-LABEL: 'memcpy'
-; THRU-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
+; THRU-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false)
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; LATE-LABEL: 'memcpy'
-; LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
+; LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false)
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE-LABEL: 'memcpy'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE_LATE-LABEL: 'memcpy'
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
+; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false)
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
+ call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false)
ret void
}
; If SSE4.1 roundps instruction is available it is cheap to lower, otherwise
; it'll be scalarized into calls which are expensive.
-define void @test1(float* nocapture %f) nounwind {
+define void @test1(ptr nocapture %f) nounwind {
vector.ph:
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds float, float* %f, i64 %index
- %1 = bitcast float* %0 to <4 x float>*
- %wide.load = load <4 x float>, <4 x float>* %1, align 4
- %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
- store <4 x float> %2, <4 x float>* %1, align 4
+ %0 = getelementptr inbounds float, ptr %f, i64 %index
+ %wide.load = load <4 x float>, ptr %0, align 4
+ %1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
+ store <4 x float> %1, ptr %0, align 4
%index.next = add i64 %index, 4
- %3 = icmp eq i64 %index.next, 1024
- br i1 %3, label %for.end, label %vector.body
+ %2 = icmp eq i64 %index.next, 1024
+ br i1 %2, label %for.end, label %vector.body
for.end: ; preds = %vector.body
ret void
; CORE2: function 'test1'
-; CORE2: Cost Model: Found an estimated cost of 46 for instruction: %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
+; CORE2: Cost Model: Found an estimated cost of 46 for instruction: %1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
; COREI7: function 'test1'
-; COREI7: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
+; COREI7: Cost Model: Found an estimated cost of 1 for instruction: %1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
}
declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
-define void @test2(float* nocapture %f) nounwind {
+define void @test2(ptr nocapture %f) nounwind {
vector.ph:
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds float, float* %f, i64 %index
- %1 = bitcast float* %0 to <4 x float>*
- %wide.load = load <4 x float>, <4 x float>* %1, align 4
- %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
- store <4 x float> %2, <4 x float>* %1, align 4
+ %0 = getelementptr inbounds float, ptr %f, i64 %index
+ %wide.load = load <4 x float>, ptr %0, align 4
+ %1 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
+ store <4 x float> %1, ptr %0, align 4
%index.next = add i64 %index, 4
- %3 = icmp eq i64 %index.next, 1024
- br i1 %3, label %for.end, label %vector.body
+ %2 = icmp eq i64 %index.next, 1024
+ br i1 %2, label %for.end, label %vector.body
for.end: ; preds = %vector.body
ret void
; CORE2: function 'test2'
-; CORE2: Cost Model: Found an estimated cost of 46 for instruction: %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
+; CORE2: Cost Model: Found an estimated cost of 46 for instruction: %1 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
; COREI7: function 'test2'
-; COREI7: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
+; COREI7: Cost Model: Found an estimated cost of 1 for instruction: %1 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
}
declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) nounwind readnone
-define void @test3(float* nocapture %f, <4 x float> %b, <4 x float> %c) nounwind {
+define void @test3(ptr nocapture %f, <4 x float> %b, <4 x float> %c) nounwind {
vector.ph:
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds float, float* %f, i64 %index
- %1 = bitcast float* %0 to <4 x float>*
- %wide.load = load <4 x float>, <4 x float>* %1, align 4
- %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c)
- store <4 x float> %2, <4 x float>* %1, align 4
+ %0 = getelementptr inbounds float, ptr %f, i64 %index
+ %wide.load = load <4 x float>, ptr %0, align 4
+ %1 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c)
+ store <4 x float> %1, ptr %0, align 4
%index.next = add i64 %index, 4
- %3 = icmp eq i64 %index.next, 1024
- br i1 %3, label %for.end, label %vector.body
+ %2 = icmp eq i64 %index.next, 1024
+ br i1 %2, label %for.end, label %vector.body
for.end: ; preds = %vector.body
ret void
; CORE2: function 'test3'
-; CORE2: Cost Model: Found an estimated cost of 4 for instruction: %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c)
+; CORE2: Cost Model: Found an estimated cost of 4 for instruction: %1 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c)
; COREI7: function 'test3'
-; COREI7: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c)
+; COREI7: Cost Model: Found an estimated cost of 2 for instruction: %1 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c)
}
declare i64 @llvm.bswap.i64(i64)
declare i128 @llvm.bswap.i128(i128)
-define i16 @var_load_bswap_i16(i16* %src) {
+define i16 @var_load_bswap_i16(ptr %src) {
; X64-LABEL: 'var_load_bswap_i16'
-; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, i16* %src, align 1
+; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %src, align 1
; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
;
; X64-MOVBE-LABEL: 'var_load_bswap_i16'
-; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, i16* %src, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %src, align 1
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
;
; X64-FASTMOVBE-LABEL: 'var_load_bswap_i16'
-; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, i16* %src, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %src, align 1
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
;
; X32-LABEL: 'var_load_bswap_i16'
-; X32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, i16* %src, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %src, align 1
; X32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
;
; X32-MOVBE-LABEL: 'var_load_bswap_i16'
-; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, i16* %src, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %src, align 1
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
;
; X32-FASTMOVBE-LABEL: 'var_load_bswap_i16'
-; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, i16* %src, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %src, align 1
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
;
- %a = load i16, i16* %src, align 1
+ %a = load i16, ptr %src, align 1
%bswap = call i16 @llvm.bswap.i16(i16 %a)
ret i16 %bswap
}
-define i16 @var_load_bswap_i16_extrause(i16* %src, i16* %clobberdst) {
+define i16 @var_load_bswap_i16_extrause(ptr %src, ptr %clobberdst) {
; ALL-LABEL: 'var_load_bswap_i16_extrause'
-; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, i16* %src, align 1
+; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, ptr %src, align 1
; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a2 = shl i16 %a, 2
-; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %a2, i16* %clobberdst, align 1
+; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %a2, ptr %clobberdst, align 1
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
;
- %a = load i16, i16* %src, align 1
+ %a = load i16, ptr %src, align 1
%bswap = call i16 @llvm.bswap.i16(i16 %a)
%a2 = shl i16 %a, 2 ; incur an extra use to the load
- store i16 %a2, i16* %clobberdst, align 1
+ store i16 %a2, ptr %clobberdst, align 1
ret i16 %bswap
}
-define i32 @var_load_bswap_i32(i32* %src) {
+define i32 @var_load_bswap_i32(ptr %src) {
; X64-LABEL: 'var_load_bswap_i32'
-; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i32, i32* %src, align 1
+; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i32, ptr %src, align 1
; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %bswap
;
; X64-MOVBE-LABEL: 'var_load_bswap_i32'
-; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i32, i32* %src, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i32, ptr %src, align 1
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %bswap
;
; X64-FASTMOVBE-LABEL: 'var_load_bswap_i32'
-; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i32, i32* %src, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i32, ptr %src, align 1
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %bswap
;
; X32-LABEL: 'var_load_bswap_i32'
-; X32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i32, i32* %src, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i32, ptr %src, align 1
; X32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %bswap
;
; X32-MOVBE-LABEL: 'var_load_bswap_i32'
-; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i32, i32* %src, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i32, ptr %src, align 1
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %bswap
;
; X32-FASTMOVBE-LABEL: 'var_load_bswap_i32'
-; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i32, i32* %src, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i32, ptr %src, align 1
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %bswap
;
- %a = load i32, i32* %src, align 1
+ %a = load i32, ptr %src, align 1
%bswap = call i32 @llvm.bswap.i32(i32 %a)
ret i32 %bswap
}
-define i32 @var_load_bswap_i32_extrause(i32* %src, i32* %clobberdst) {
+define i32 @var_load_bswap_i32_extrause(ptr %src, ptr %clobberdst) {
; ALL-LABEL: 'var_load_bswap_i32_extrause'
-; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i32, i32* %src, align 1
+; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i32, ptr %src, align 1
; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a2 = shl i32 %a, 2
-; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %a2, i32* %clobberdst, align 1
+; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %a2, ptr %clobberdst, align 1
; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %bswap
;
- %a = load i32, i32* %src, align 1
+ %a = load i32, ptr %src, align 1
%bswap = call i32 @llvm.bswap.i32(i32 %a)
%a2 = shl i32 %a, 2 ; incur an extra use to the load
- store i32 %a2, i32* %clobberdst, align 1
+ store i32 %a2, ptr %clobberdst, align 1
ret i32 %bswap
}
-define i64 @var_load_bswap_i64(i64* %src) {
+define i64 @var_load_bswap_i64(ptr %src) {
; X64-LABEL: 'var_load_bswap_i64'
-; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i64, i64* %src, align 1
+; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i64, ptr %src, align 1
; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
;
; X64-MOVBE-LABEL: 'var_load_bswap_i64'
-; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i64, i64* %src, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i64, ptr %src, align 1
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
;
; X64-FASTMOVBE-LABEL: 'var_load_bswap_i64'
-; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i64, i64* %src, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i64, ptr %src, align 1
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
;
; X32-LABEL: 'var_load_bswap_i64'
-; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i64, i64* %src, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i64, ptr %src, align 1
; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
;
; X32-MOVBE-LABEL: 'var_load_bswap_i64'
-; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i64, i64* %src, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i64, ptr %src, align 1
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
;
; X32-FASTMOVBE-LABEL: 'var_load_bswap_i64'
-; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i64, i64* %src, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i64, ptr %src, align 1
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
;
- %a = load i64, i64* %src, align 1
+ %a = load i64, ptr %src, align 1
%bswap = call i64 @llvm.bswap.i64(i64 %a)
ret i64 %bswap
}
-define i64 @var_load_bswap_i64_extrause(i64* %src, i64* %clobberdst) {
+define i64 @var_load_bswap_i64_extrause(ptr %src, ptr %clobberdst) {
; X64-LABEL: 'var_load_bswap_i64_extrause'
-; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i64, i64* %src, align 1
+; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i64, ptr %src, align 1
; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a2 = shl i64 %a, 2
-; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %a2, i64* %clobberdst, align 1
+; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %a2, ptr %clobberdst, align 1
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
;
; X64-MOVBE-LABEL: 'var_load_bswap_i64_extrause'
-; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i64, i64* %src, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i64, ptr %src, align 1
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a2 = shl i64 %a, 2
-; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %a2, i64* %clobberdst, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %a2, ptr %clobberdst, align 1
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
;
; X64-FASTMOVBE-LABEL: 'var_load_bswap_i64_extrause'
-; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i64, i64* %src, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i64, ptr %src, align 1
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a2 = shl i64 %a, 2
-; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %a2, i64* %clobberdst, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %a2, ptr %clobberdst, align 1
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
;
; X32-LABEL: 'var_load_bswap_i64_extrause'
-; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i64, i64* %src, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i64, ptr %src, align 1
; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = shl i64 %a, 2
-; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %a2, i64* %clobberdst, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %a2, ptr %clobberdst, align 1
; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
;
; X32-MOVBE-LABEL: 'var_load_bswap_i64_extrause'
-; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i64, i64* %src, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i64, ptr %src, align 1
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = shl i64 %a, 2
-; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %a2, i64* %clobberdst, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %a2, ptr %clobberdst, align 1
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
;
; X32-FASTMOVBE-LABEL: 'var_load_bswap_i64_extrause'
-; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i64, i64* %src, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i64, ptr %src, align 1
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = shl i64 %a, 2
-; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %a2, i64* %clobberdst, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %a2, ptr %clobberdst, align 1
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
;
- %a = load i64, i64* %src, align 1
+ %a = load i64, ptr %src, align 1
%bswap = call i64 @llvm.bswap.i64(i64 %a)
%a2 = shl i64 %a, 2 ; incur an extra use to the load
- store i64 %a2, i64* %clobberdst, align 1
+ store i64 %a2, ptr %clobberdst, align 1
ret i64 %bswap
}
-define i128 @var_load_bswap_i128(i128* %src) {
+define i128 @var_load_bswap_i128(ptr %src) {
; X64-LABEL: 'var_load_bswap_i128'
-; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i128, i128* %src, align 1
+; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i128, ptr %src, align 1
; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
;
; X64-MOVBE-LABEL: 'var_load_bswap_i128'
-; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i128, i128* %src, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i128, ptr %src, align 1
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
;
; X64-FASTMOVBE-LABEL: 'var_load_bswap_i128'
-; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i128, i128* %src, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i128, ptr %src, align 1
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
;
; X32-LABEL: 'var_load_bswap_i128'
-; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = load i128, i128* %src, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = load i128, ptr %src, align 1
; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
;
; X32-MOVBE-LABEL: 'var_load_bswap_i128'
-; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = load i128, i128* %src, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = load i128, ptr %src, align 1
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
;
; X32-FASTMOVBE-LABEL: 'var_load_bswap_i128'
-; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = load i128, i128* %src, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = load i128, ptr %src, align 1
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
;
- %a = load i128, i128* %src, align 1
+ %a = load i128, ptr %src, align 1
%bswap = call i128 @llvm.bswap.i128(i128 %a)
ret i128 %bswap
}
-define i128 @var_load_bswap_i128_extrause(i128* %src, i128* %clobberdst) {
+define i128 @var_load_bswap_i128_extrause(ptr %src, ptr %clobberdst) {
; X64-LABEL: 'var_load_bswap_i128_extrause'
-; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i128, i128* %src, align 1
+; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i128, ptr %src, align 1
; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = shl i128 %a, 2
-; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %a2, i128* %clobberdst, align 1
+; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %a2, ptr %clobberdst, align 1
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
;
; X64-MOVBE-LABEL: 'var_load_bswap_i128_extrause'
-; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i128, i128* %src, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i128, ptr %src, align 1
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = shl i128 %a, 2
-; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %a2, i128* %clobberdst, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %a2, ptr %clobberdst, align 1
; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
;
; X64-FASTMOVBE-LABEL: 'var_load_bswap_i128_extrause'
-; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i128, i128* %src, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i128, ptr %src, align 1
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = shl i128 %a, 2
-; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %a2, i128* %clobberdst, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %a2, ptr %clobberdst, align 1
; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
;
; X32-LABEL: 'var_load_bswap_i128_extrause'
-; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = load i128, i128* %src, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = load i128, ptr %src, align 1
; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2 = shl i128 %a, 2
-; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %a2, i128* %clobberdst, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %a2, ptr %clobberdst, align 1
; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
;
; X32-MOVBE-LABEL: 'var_load_bswap_i128_extrause'
-; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = load i128, i128* %src, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = load i128, ptr %src, align 1
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2 = shl i128 %a, 2
-; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %a2, i128* %clobberdst, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %a2, ptr %clobberdst, align 1
; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
;
; X32-FASTMOVBE-LABEL: 'var_load_bswap_i128_extrause'
-; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = load i128, i128* %src, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = load i128, ptr %src, align 1
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2 = shl i128 %a, 2
-; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %a2, i128* %clobberdst, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %a2, ptr %clobberdst, align 1
; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
;
- %a = load i128, i128* %src, align 1
+ %a = load i128, ptr %src, align 1
%bswap = call i128 @llvm.bswap.i128(i128 %a)
%a2 = shl i128 %a, 2 ; incur an extra use to the load
- store i128 %a2, i128* %clobberdst, align 1
+ store i128 %a2, ptr %clobberdst, align 1
ret i128 %bswap
}
; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=x86_64--linux-gnu < %s | FileCheck %s --check-prefix=CHECK
; Check that cost is 1 for unusual load to register sized load.
-define i32 @loadUnusualIntegerWithTrunc(i128* %ptr) {
+define i32 @loadUnusualIntegerWithTrunc(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualIntegerWithTrunc'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, i128* %ptr, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc = trunc i128 %out to i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %trunc
;
- %out = load i128, i128* %ptr
+ %out = load i128, ptr %ptr
%trunc = trunc i128 %out to i32
ret i32 %trunc
}
-define i128 @loadUnusualInteger(i128* %ptr) {
+define i128 @loadUnusualInteger(ptr %ptr) {
; CHECK-LABEL: 'loadUnusualInteger'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, i128* %ptr, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %out = load i128, ptr %ptr, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i128 %out
;
- %out = load i128, i128* %ptr
+ %out = load i128, ptr %ptr
ret i128 %out
}
define i32 @stores_align4(i32 %arg) {
; Scalars
; SSE2-LABEL: 'stores_align4'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, i1* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <128 x i1> undef, <128 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, <16 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, <8 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, <4 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, <2 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, <1 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <256 x i1> undef, <256 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, <8 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, <4 x double>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, <32 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <512 x i1> undef, <512 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x float> undef, <16 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x double> undef, <8 x double>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, <64 x i1>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store ptr undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <128 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <256 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x ptr> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <512 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <64 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x float> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x double> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x ptr> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE41-LABEL: 'stores_align4'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, i1* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <128 x i1> undef, <128 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, <16 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, <8 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, <4 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, <2 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, <1 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <256 x i1> undef, <256 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, <8 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, <4 x double>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, <32 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <512 x i1> undef, <512 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x float> undef, <16 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x double> undef, <8 x double>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, <64 x i1>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store ptr undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <128 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <256 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x ptr> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <512 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <64 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x float> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x double> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x ptr> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'stores_align4'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, i1* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <128 x i1> undef, <128 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, <16 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, <8 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, <4 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, <2 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, <1 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <256 x i1> undef, <256 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x float> undef, <8 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, <4 x double>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, <32 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <512 x i1> undef, <512 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x float> undef, <16 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x double> undef, <8 x double>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, <64 x i1>* undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store ptr undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <128 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <256 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x float> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x ptr> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <512 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <64 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i32> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i64> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x float> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x double> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x ptr> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, ptr undef, align 4
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'stores_align4'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, i1* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <128 x i1> undef, <128 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, <16 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, <8 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, <4 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, <2 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, <1 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <256 x i1> undef, <256 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x float> undef, <8 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, <4 x double>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, <32 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <512 x i1> undef, <512 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x float> undef, <16 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x double> undef, <8 x double>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, <64 x i1>* undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store ptr undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <128 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <256 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x float> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x ptr> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <512 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i32> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i64> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x float> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x double> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x ptr> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, ptr undef, align 4
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- store i8 undef, i8* undef, align 4
- store i16 undef, i16* undef, align 4
- store i32 undef, i32* undef, align 4
- store i64 undef, i64* undef, align 4
- store i128 undef, i128* undef, align 4
- store i256 undef, i256* undef, align 4
- store i512 undef, i512* undef, align 4
+ store i8 undef, ptr undef, align 4
+ store i16 undef, ptr undef, align 4
+ store i32 undef, ptr undef, align 4
+ store i64 undef, ptr undef, align 4
+ store i128 undef, ptr undef, align 4
+ store i256 undef, ptr undef, align 4
+ store i512 undef, ptr undef, align 4
- store float undef, float* undef, align 4
- store double undef, double* undef, align 4
+ store float undef, ptr undef, align 4
+ store double undef, ptr undef, align 4
- store i8* undef, i8** undef, align 4
+ store ptr undef, ptr undef, align 4
- store i1 undef, i1* undef, align 4
+ store i1 undef, ptr undef, align 4
; XMM (128-bit) vectors
- store <128 x i1> undef, <128 x i1>* undef, align 4
- store <16 x i8> undef, <16 x i8>* undef, align 4
- store <8 x i16> undef, <8 x i16>* undef, align 4
- store <4 x i32> undef, <4 x i32>* undef, align 4
- store <2 x i64> undef, <2 x i64>* undef, align 4
- store <1 x i128> undef, <1 x i128>* undef, align 4
+ store <128 x i1> undef, ptr undef, align 4
+ store <16 x i8> undef, ptr undef, align 4
+ store <8 x i16> undef, ptr undef, align 4
+ store <4 x i32> undef, ptr undef, align 4
+ store <2 x i64> undef, ptr undef, align 4
+ store <1 x i128> undef, ptr undef, align 4
- store <4 x float> undef, <4 x float>* undef, align 4
- store <2 x double> undef, <2 x double>* undef, align 4
+ store <4 x float> undef, ptr undef, align 4
+ store <2 x double> undef, ptr undef, align 4
- store <2 x i8*> undef, <2 x i8*>* undef, align 4
+ store <2 x ptr> undef, ptr undef, align 4
- store <16 x i1> undef, <16 x i1>* undef, align 4
- store <8 x i1> undef, <8 x i1>* undef, align 4
- store <4 x i1> undef, <4 x i1>* undef, align 4
- store <2 x i1> undef, <2 x i1>* undef, align 4
- store <1 x i1> undef, <1 x i1>* undef, align 4
+ store <16 x i1> undef, ptr undef, align 4
+ store <8 x i1> undef, ptr undef, align 4
+ store <4 x i1> undef, ptr undef, align 4
+ store <2 x i1> undef, ptr undef, align 4
+ store <1 x i1> undef, ptr undef, align 4
; YMM (256-bit) vectors
- store <256 x i1> undef, <256 x i1>* undef, align 4
- store <32 x i8> undef, <32 x i8>* undef, align 4
- store <16 x i16> undef, <16 x i16>* undef, align 4
- store <8 x i32> undef, <8 x i32>* undef, align 4
- store <4 x i64> undef, <4 x i64>* undef, align 4
- store <2 x i128> undef, <2 x i128>* undef, align 4
- store <1 x i256> undef, <1 x i256>* undef, align 4
+ store <256 x i1> undef, ptr undef, align 4
+ store <32 x i8> undef, ptr undef, align 4
+ store <16 x i16> undef, ptr undef, align 4
+ store <8 x i32> undef, ptr undef, align 4
+ store <4 x i64> undef, ptr undef, align 4
+ store <2 x i128> undef, ptr undef, align 4
+ store <1 x i256> undef, ptr undef, align 4
- store <8 x float> undef, <8 x float>* undef, align 4
- store <4 x double> undef, <4 x double>* undef, align 4
+ store <8 x float> undef, ptr undef, align 4
+ store <4 x double> undef, ptr undef, align 4
- store <4 x i8*> undef, <4 x i8*>* undef, align 4
+ store <4 x ptr> undef, ptr undef, align 4
- store <32 x i1> undef, <32 x i1>* undef, align 4
+ store <32 x i1> undef, ptr undef, align 4
; ZMM (512-bit) vectors
- store <512 x i1> undef, <512 x i1>* undef, align 4
- store <64 x i8> undef, <64 x i8>* undef, align 4
- store <32 x i16> undef, <32 x i16>* undef, align 4
- store <16 x i32> undef, <16 x i32>* undef, align 4
- store <8 x i64> undef, <8 x i64>* undef, align 4
- store <4 x i128> undef, <4 x i128>* undef, align 4
- store <2 x i256> undef, <2 x i256>* undef, align 4
- store <1 x i512> undef, <1 x i512>* undef, align 4
+ store <512 x i1> undef, ptr undef, align 4
+ store <64 x i8> undef, ptr undef, align 4
+ store <32 x i16> undef, ptr undef, align 4
+ store <16 x i32> undef, ptr undef, align 4
+ store <8 x i64> undef, ptr undef, align 4
+ store <4 x i128> undef, ptr undef, align 4
+ store <2 x i256> undef, ptr undef, align 4
+ store <1 x i512> undef, ptr undef, align 4
- store <16 x float> undef, <16 x float>* undef, align 4
- store <8 x double> undef, <8 x double>* undef, align 4
+ store <16 x float> undef, ptr undef, align 4
+ store <8 x double> undef, ptr undef, align 4
- store <8 x i8*> undef, <8 x i8*>* undef, align 4
+ store <8 x ptr> undef, ptr undef, align 4
- store <64 x i1> undef, <64 x i1>* undef, align 4
+ store <64 x i1> undef, ptr undef, align 4
ret i32 undef
}
define i32 @stores_partial_align4(i32 %arg) {
; Partial vectors with i64 elements (doubles as pointer-sized tests))
; SSE2-LABEL: 'stores_partial_align4'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, <3 x double>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, <5 x double>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, <6 x double>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, <7 x double>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x float> undef, <5 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, <6 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x float> undef, <7 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, <9 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, <10 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, <11 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, <12 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, <13 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, <14 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, <15 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, <3 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, <5 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, <6 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, <7 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i1> undef, <9 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <10 x i1> undef, <10 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i1> undef, <11 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <12 x i1> undef, <12 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i1> undef, <13 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i1> undef, <14 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <15 x i1> undef, <15 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i1> undef, <17 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i1> undef, <18 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <19 x i1> undef, <19 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i1> undef, <20 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <21 x i1> undef, <21 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <22 x i1> undef, <22 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <23 x i1> undef, <23 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i1> undef, <24 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, <25 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, <26 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, <27 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, <28 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, <29 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, <30 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, <31 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, <33 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, <34 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, <35 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, <36 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, <37 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, <38 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, <39 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, <40 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i1> undef, <41 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i1> undef, <42 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <43 x i1> undef, <43 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i1> undef, <44 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <45 x i1> undef, <45 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <46 x i1> undef, <46 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <47 x i1> undef, <47 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, <48 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <49 x i1> undef, <49 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <50 x i1> undef, <50 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <51 x i1> undef, <51 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <52 x i1> undef, <52 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <53 x i1> undef, <53 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <54 x i1> undef, <54 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <55 x i1> undef, <55 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <56 x i1> undef, <56 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i1> undef, <57 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i1> undef, <58 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <59 x i1> undef, <59 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i1> undef, <60 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <61 x i1> undef, <61 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <62 x i1> undef, <62 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <63 x i1> undef, <63 x i1>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x i64> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i64> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i32> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i32> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i32> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i32> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i32> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i32> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x float> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x float> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <6 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <22 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <7 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <12 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <13 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <14 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <15 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <19 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <22 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <29 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <31 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <38 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <39 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <41 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <43 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <44 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <45 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <46 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <47 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <49 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <51 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <54 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <55 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <59 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <60 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <61 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <63 x i8> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <10 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <12 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <15 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <19 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <21 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <22 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <23 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <43 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <45 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <46 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <47 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <49 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <50 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <51 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <52 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <53 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <54 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <55 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <56 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <59 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <61 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <62 x i1> undef, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <63 x i1> undef, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE41-LABEL: 'stores_partial_align4'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, <3 x double>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, <5 x double>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, <6 x double>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, <7 x double>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x float> undef, <5 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, <6 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x float> undef, <7 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, <9 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, <10 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, <11 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, <12 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, <13 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, <14 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, <15 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, <3 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, <5 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, <6 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, <7 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, <9 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, <10 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, <11 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, <12 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, <13 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, <14 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, <15 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i1> undef, <17 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i1> undef, <18 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <19 x i1> undef, <19 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i1> undef, <20 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <21 x i1> undef, <21 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <22 x i1> undef, <22 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <23 x i1> undef, <23 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i1> undef, <24 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i1> undef, <25 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i1> undef, <26 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <27 x i1> undef, <27 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i1> undef, <28 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <29 x i1> undef, <29 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <30 x i1> undef, <30 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <31 x i1> undef, <31 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, <33 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, <34 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, <35 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, <36 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, <37 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, <38 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, <39 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, <40 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <41 x i1> undef, <41 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <42 x i1> undef, <42 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <43 x i1> undef, <43 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <44 x i1> undef, <44 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <45 x i1> undef, <45 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <46 x i1> undef, <46 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <47 x i1> undef, <47 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, <48 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <49 x i1> undef, <49 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <50 x i1> undef, <50 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <51 x i1> undef, <51 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <52 x i1> undef, <52 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <53 x i1> undef, <53 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <54 x i1> undef, <54 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <55 x i1> undef, <55 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <56 x i1> undef, <56 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <57 x i1> undef, <57 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <58 x i1> undef, <58 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <59 x i1> undef, <59 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <60 x i1> undef, <60 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <61 x i1> undef, <61 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <62 x i1> undef, <62 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <63 x i1> undef, <63 x i1>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x i64> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i64> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i32> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i32> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i32> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i32> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i32> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x i32> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x float> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x float> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <22 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <30 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <31 x i16> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <19 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <21 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <22 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <27 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <30 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <31 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <35 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <37 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <38 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <39 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <43 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <46 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <47 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <49 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <51 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <53 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <54 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <55 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <59 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <61 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <62 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <63 x i8> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <19 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <21 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <22 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <23 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <27 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <29 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <30 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <31 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <41 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <42 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <43 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <44 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <45 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <46 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <47 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <49 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <50 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <51 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <52 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <53 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <54 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <55 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <56 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <57 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <58 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <59 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <60 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <61 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <62 x i1> undef, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <63 x i1> undef, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'stores_partial_align4'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x double> undef, <5 x double>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x double> undef, <6 x double>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, <7 x double>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x float> undef, <5 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x float> undef, <6 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x float> undef, <7 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x float> undef, <9 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x float> undef, <10 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x float> undef, <11 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x float> undef, <12 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, <13 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, <14 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, <15 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, <3 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, <5 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, <6 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, <7 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, <9 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, <10 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, <11 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, <12 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, <13 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, <14 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, <15 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i1> undef, <17 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i1> undef, <18 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <19 x i1> undef, <19 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i1> undef, <20 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <21 x i1> undef, <21 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <22 x i1> undef, <22 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <23 x i1> undef, <23 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i1> undef, <24 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, <25 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, <26 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, <27 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, <28 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, <29 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, <30 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, <31 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, <33 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, <34 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, <35 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, <36 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, <37 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, <38 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, <39 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, <40 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <41 x i1> undef, <41 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <42 x i1> undef, <42 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <43 x i1> undef, <43 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <44 x i1> undef, <44 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <45 x i1> undef, <45 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <46 x i1> undef, <46 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <47 x i1> undef, <47 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, <48 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <49 x i1> undef, <49 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <50 x i1> undef, <50 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <51 x i1> undef, <51 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <52 x i1> undef, <52 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <53 x i1> undef, <53 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <54 x i1> undef, <54 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <55 x i1> undef, <55 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i1> undef, <56 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i1> undef, <57 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i1> undef, <58 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <59 x i1> undef, <59 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i1> undef, <60 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <61 x i1> undef, <61 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <62 x i1> undef, <62 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <63 x i1> undef, <63 x i1>* undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i64> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i64> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i64> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x double> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x double> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i32> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x i32> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x i32> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i32> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i32> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i32> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x i32> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x float> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x float> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x float> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x float> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x float> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x float> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x float> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <10 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <19 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <21 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <22 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <30 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <31 x i16> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <18 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <19 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <20 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <21 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <22 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <29 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <35 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <37 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <38 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <39 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <41 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <42 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <43 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <44 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <45 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <46 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <47 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <50 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <51 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <52 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <54 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <55 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <59 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <61 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <62 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <63 x i8> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <19 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <21 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <22 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <23 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <41 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <42 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <43 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <44 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <45 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <46 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <47 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <49 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <50 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <51 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <52 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <53 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <54 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <55 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <59 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <61 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <62 x i1> undef, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <63 x i1> undef, ptr undef, align 4
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'stores_partial_align4'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, <5 x double>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, <6 x double>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x double> undef, <7 x double>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x float> undef, <5 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x float> undef, <6 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x float> undef, <7 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, <9 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, <10 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, <11 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, <12 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x float> undef, <13 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x float> undef, <14 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x float> undef, <15 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, <3 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, <5 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, <6 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, <7 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, <9 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, <10 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, <11 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, <12 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, <13 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, <14 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, <15 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i1> undef, <17 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i1> undef, <18 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <19 x i1> undef, <19 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i1> undef, <20 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <21 x i1> undef, <21 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <22 x i1> undef, <22 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <23 x i1> undef, <23 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i1> undef, <24 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, <25 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, <26 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, <27 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, <28 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, <29 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, <30 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, <31 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <33 x i1> undef, <33 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <34 x i1> undef, <34 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <35 x i1> undef, <35 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <36 x i1> undef, <36 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <37 x i1> undef, <37 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <38 x i1> undef, <38 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <39 x i1> undef, <39 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i1> undef, <40 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i1> undef, <41 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i1> undef, <42 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <43 x i1> undef, <43 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i1> undef, <44 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <45 x i1> undef, <45 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <46 x i1> undef, <46 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <47 x i1> undef, <47 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i1> undef, <48 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i1> undef, <49 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <50 x i1> undef, <50 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <51 x i1> undef, <51 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <52 x i1> undef, <52 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <53 x i1> undef, <53 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <54 x i1> undef, <54 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <55 x i1> undef, <55 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <56 x i1> undef, <56 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <57 x i1> undef, <57 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <58 x i1> undef, <58 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <59 x i1> undef, <59 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <60 x i1> undef, <60 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <61 x i1> undef, <61 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <62 x i1> undef, <62 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <63 x i1> undef, <63 x i1>* undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i64> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x double> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i32> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i32> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i32> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <13 x i32> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i32> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i32> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x float> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x float> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x float> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x float> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x float> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x float> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <10 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <18 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <19 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <22 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <25 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <26 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <27 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <29 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <18 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <19 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <20 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <21 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <22 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <29 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <34 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <36 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <38 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <39 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <43 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <46 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <47 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <49 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <50 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <51 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <52 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <53 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <54 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <55 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <56 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <58 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <59 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <60 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <61 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <63 x i8> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <19 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <21 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <22 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <23 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <33 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <34 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <35 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <36 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <37 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <38 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <39 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <43 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <45 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <46 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <47 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <50 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <51 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <52 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <53 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <54 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <55 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <56 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <57 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <58 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <59 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <60 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <61 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <62 x i1> undef, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <63 x i1> undef, ptr undef, align 4
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- store <1 x i64> undef, <1 x i64>* undef, align 4
+ store <1 x i64> undef, ptr undef, align 4
; <2 x i64> is XMM
- store <3 x i64> undef, <3 x i64>* undef, align 4
+ store <3 x i64> undef, ptr undef, align 4
; <4 x i64> is YMM
- store <5 x i64> undef, <5 x i64>* undef, align 4
- store <6 x i64> undef, <6 x i64>* undef, align 4
- store <7 x i64> undef, <7 x i64>* undef, align 4
+ store <5 x i64> undef, ptr undef, align 4
+ store <6 x i64> undef, ptr undef, align 4
+ store <7 x i64> undef, ptr undef, align 4
; <8 x i64> is ZMM
; Partial vectors with double elements
- store <1 x double> undef, <1 x double>* undef, align 4
+ store <1 x double> undef, ptr undef, align 4
; <2 x double> is XMM
- store <3 x double> undef, <3 x double>* undef, align 4
+ store <3 x double> undef, ptr undef, align 4
; <4 x double> is YMM
- store <5 x double> undef, <5 x double>* undef, align 4
- store <6 x double> undef, <6 x double>* undef, align 4
- store <7 x double> undef, <7 x double>* undef, align 4
+ store <5 x double> undef, ptr undef, align 4
+ store <6 x double> undef, ptr undef, align 4
+ store <7 x double> undef, ptr undef, align 4
; <8 x double> is ZMM
; Partial vectors with i32 elements
- store <1 x i32> undef, <1 x i32>* undef, align 4
- store <2 x i32> undef, <2 x i32>* undef, align 4
- store <3 x i32> undef, <3 x i32>* undef, align 4
+ store <1 x i32> undef, ptr undef, align 4
+ store <2 x i32> undef, ptr undef, align 4
+ store <3 x i32> undef, ptr undef, align 4
; <4 x i32> is XMM
- store <5 x i32> undef, <5 x i32>* undef, align 4
- store <6 x i32> undef, <6 x i32>* undef, align 4
- store <7 x i32> undef, <7 x i32>* undef, align 4
+ store <5 x i32> undef, ptr undef, align 4
+ store <6 x i32> undef, ptr undef, align 4
+ store <7 x i32> undef, ptr undef, align 4
; <8 x i32> is YMM
- store <9 x i32> undef, <9 x i32>* undef, align 4
- store <10 x i32> undef, <10 x i32>* undef, align 4
- store <11 x i32> undef, <11 x i32>* undef, align 4
- store <12 x i32> undef, <12 x i32>* undef, align 4
- store <13 x i32> undef, <13 x i32>* undef, align 4
- store <14 x i32> undef, <14 x i32>* undef, align 4
- store <15 x i32> undef, <15 x i32>* undef, align 4
+ store <9 x i32> undef, ptr undef, align 4
+ store <10 x i32> undef, ptr undef, align 4
+ store <11 x i32> undef, ptr undef, align 4
+ store <12 x i32> undef, ptr undef, align 4
+ store <13 x i32> undef, ptr undef, align 4
+ store <14 x i32> undef, ptr undef, align 4
+ store <15 x i32> undef, ptr undef, align 4
; <16 x i32> is ZMM
; Partial vectors with float elements
- store <1 x float> undef, <1 x float>* undef, align 4
- store <2 x float> undef, <2 x float>* undef, align 4
- store <3 x float> undef, <3 x float>* undef, align 4
+ store <1 x float> undef, ptr undef, align 4
+ store <2 x float> undef, ptr undef, align 4
+ store <3 x float> undef, ptr undef, align 4
; <4 x float> is XMM
- store <5 x float> undef, <5 x float>* undef, align 4
- store <6 x float> undef, <6 x float>* undef, align 4
- store <7 x float> undef, <7 x float>* undef, align 4
+ store <5 x float> undef, ptr undef, align 4
+ store <6 x float> undef, ptr undef, align 4
+ store <7 x float> undef, ptr undef, align 4
; <8 x float> is YMM
- store <9 x float> undef, <9 x float>* undef, align 4
- store <10 x float> undef, <10 x float>* undef, align 4
- store <11 x float> undef, <11 x float>* undef, align 4
- store <12 x float> undef, <12 x float>* undef, align 4
- store <13 x float> undef, <13 x float>* undef, align 4
- store <14 x float> undef, <14 x float>* undef, align 4
- store <15 x float> undef, <15 x float>* undef, align 4
+ store <9 x float> undef, ptr undef, align 4
+ store <10 x float> undef, ptr undef, align 4
+ store <11 x float> undef, ptr undef, align 4
+ store <12 x float> undef, ptr undef, align 4
+ store <13 x float> undef, ptr undef, align 4
+ store <14 x float> undef, ptr undef, align 4
+ store <15 x float> undef, ptr undef, align 4
; <16 x float> is ZMM
; Partial vectors with i16 elements
- store <1 x i16> undef, <1 x i16>* undef, align 4
- store <2 x i16> undef, <2 x i16>* undef, align 4
- store <3 x i16> undef, <3 x i16>* undef, align 4
- store <4 x i16> undef, <4 x i16>* undef, align 4
- store <5 x i16> undef, <5 x i16>* undef, align 4
- store <6 x i16> undef, <6 x i16>* undef, align 4
- store <7 x i16> undef, <7 x i16>* undef, align 4
+ store <1 x i16> undef, ptr undef, align 4
+ store <2 x i16> undef, ptr undef, align 4
+ store <3 x i16> undef, ptr undef, align 4
+ store <4 x i16> undef, ptr undef, align 4
+ store <5 x i16> undef, ptr undef, align 4
+ store <6 x i16> undef, ptr undef, align 4
+ store <7 x i16> undef, ptr undef, align 4
; <8 x i16> is XMM
- store <9 x i16> undef, <9 x i16>* undef, align 4
- store <10 x i16> undef, <10 x i16>* undef, align 4
- store <11 x i16> undef, <11 x i16>* undef, align 4
- store <12 x i16> undef, <12 x i16>* undef, align 4
- store <13 x i16> undef, <13 x i16>* undef, align 4
- store <14 x i16> undef, <14 x i16>* undef, align 4
- store <15 x i16> undef, <15 x i16>* undef, align 4
+ store <9 x i16> undef, ptr undef, align 4
+ store <10 x i16> undef, ptr undef, align 4
+ store <11 x i16> undef, ptr undef, align 4
+ store <12 x i16> undef, ptr undef, align 4
+ store <13 x i16> undef, ptr undef, align 4
+ store <14 x i16> undef, ptr undef, align 4
+ store <15 x i16> undef, ptr undef, align 4
; <16 x i16> is YMM
- store <17 x i16> undef, <17 x i16>* undef, align 4
- store <18 x i16> undef, <18 x i16>* undef, align 4
- store <19 x i16> undef, <19 x i16>* undef, align 4
- store <20 x i16> undef, <20 x i16>* undef, align 4
- store <21 x i16> undef, <21 x i16>* undef, align 4
- store <22 x i16> undef, <22 x i16>* undef, align 4
- store <23 x i16> undef, <23 x i16>* undef, align 4
- store <24 x i16> undef, <24 x i16>* undef, align 4
- store <25 x i16> undef, <25 x i16>* undef, align 4
- store <26 x i16> undef, <26 x i16>* undef, align 4
- store <27 x i16> undef, <27 x i16>* undef, align 4
- store <28 x i16> undef, <28 x i16>* undef, align 4
- store <29 x i16> undef, <29 x i16>* undef, align 4
- store <30 x i16> undef, <30 x i16>* undef, align 4
- store <31 x i16> undef, <31 x i16>* undef, align 4
+ store <17 x i16> undef, ptr undef, align 4
+ store <18 x i16> undef, ptr undef, align 4
+ store <19 x i16> undef, ptr undef, align 4
+ store <20 x i16> undef, ptr undef, align 4
+ store <21 x i16> undef, ptr undef, align 4
+ store <22 x i16> undef, ptr undef, align 4
+ store <23 x i16> undef, ptr undef, align 4
+ store <24 x i16> undef, ptr undef, align 4
+ store <25 x i16> undef, ptr undef, align 4
+ store <26 x i16> undef, ptr undef, align 4
+ store <27 x i16> undef, ptr undef, align 4
+ store <28 x i16> undef, ptr undef, align 4
+ store <29 x i16> undef, ptr undef, align 4
+ store <30 x i16> undef, ptr undef, align 4
+ store <31 x i16> undef, ptr undef, align 4
; <32 x i16> is ZMM
; Partial vectors with i8 elements
- store <1 x i8> undef, <1 x i8>* undef, align 4
- store <2 x i8> undef, <2 x i8>* undef, align 4
- store <3 x i8> undef, <3 x i8>* undef, align 4
- store <4 x i8> undef, <4 x i8>* undef, align 4
- store <5 x i8> undef, <5 x i8>* undef, align 4
- store <6 x i8> undef, <6 x i8>* undef, align 4
- store <7 x i8> undef, <7 x i8>* undef, align 4
- store <8 x i8> undef, <8 x i8>* undef, align 4
- store <9 x i8> undef, <9 x i8>* undef, align 4
- store <10 x i8> undef, <10 x i8>* undef, align 4
- store <11 x i8> undef, <11 x i8>* undef, align 4
- store <12 x i8> undef, <12 x i8>* undef, align 4
- store <13 x i8> undef, <13 x i8>* undef, align 4
- store <14 x i8> undef, <14 x i8>* undef, align 4
- store <15 x i8> undef, <15 x i8>* undef, align 4
+ store <1 x i8> undef, ptr undef, align 4
+ store <2 x i8> undef, ptr undef, align 4
+ store <3 x i8> undef, ptr undef, align 4
+ store <4 x i8> undef, ptr undef, align 4
+ store <5 x i8> undef, ptr undef, align 4
+ store <6 x i8> undef, ptr undef, align 4
+ store <7 x i8> undef, ptr undef, align 4
+ store <8 x i8> undef, ptr undef, align 4
+ store <9 x i8> undef, ptr undef, align 4
+ store <10 x i8> undef, ptr undef, align 4
+ store <11 x i8> undef, ptr undef, align 4
+ store <12 x i8> undef, ptr undef, align 4
+ store <13 x i8> undef, ptr undef, align 4
+ store <14 x i8> undef, ptr undef, align 4
+ store <15 x i8> undef, ptr undef, align 4
; <16 x i8> is XMM
- store <17 x i8> undef, <17 x i8>* undef, align 4
- store <18 x i8> undef, <18 x i8>* undef, align 4
- store <19 x i8> undef, <19 x i8>* undef, align 4
- store <20 x i8> undef, <20 x i8>* undef, align 4
- store <21 x i8> undef, <21 x i8>* undef, align 4
- store <22 x i8> undef, <22 x i8>* undef, align 4
- store <23 x i8> undef, <23 x i8>* undef, align 4
- store <24 x i8> undef, <24 x i8>* undef, align 4
- store <25 x i8> undef, <25 x i8>* undef, align 4
- store <26 x i8> undef, <26 x i8>* undef, align 4
- store <27 x i8> undef, <27 x i8>* undef, align 4
- store <28 x i8> undef, <28 x i8>* undef, align 4
- store <29 x i8> undef, <29 x i8>* undef, align 4
- store <30 x i8> undef, <30 x i8>* undef, align 4
- store <31 x i8> undef, <31 x i8>* undef, align 4
+ store <17 x i8> undef, ptr undef, align 4
+ store <18 x i8> undef, ptr undef, align 4
+ store <19 x i8> undef, ptr undef, align 4
+ store <20 x i8> undef, ptr undef, align 4
+ store <21 x i8> undef, ptr undef, align 4
+ store <22 x i8> undef, ptr undef, align 4
+ store <23 x i8> undef, ptr undef, align 4
+ store <24 x i8> undef, ptr undef, align 4
+ store <25 x i8> undef, ptr undef, align 4
+ store <26 x i8> undef, ptr undef, align 4
+ store <27 x i8> undef, ptr undef, align 4
+ store <28 x i8> undef, ptr undef, align 4
+ store <29 x i8> undef, ptr undef, align 4
+ store <30 x i8> undef, ptr undef, align 4
+ store <31 x i8> undef, ptr undef, align 4
; <32 x i8> is YMM
- store <33 x i8> undef, <33 x i8>* undef, align 4
- store <34 x i8> undef, <34 x i8>* undef, align 4
- store <35 x i8> undef, <35 x i8>* undef, align 4
- store <36 x i8> undef, <36 x i8>* undef, align 4
- store <37 x i8> undef, <37 x i8>* undef, align 4
- store <38 x i8> undef, <38 x i8>* undef, align 4
- store <39 x i8> undef, <39 x i8>* undef, align 4
- store <40 x i8> undef, <40 x i8>* undef, align 4
- store <41 x i8> undef, <41 x i8>* undef, align 4
- store <42 x i8> undef, <42 x i8>* undef, align 4
- store <43 x i8> undef, <43 x i8>* undef, align 4
- store <44 x i8> undef, <44 x i8>* undef, align 4
- store <45 x i8> undef, <45 x i8>* undef, align 4
- store <46 x i8> undef, <46 x i8>* undef, align 4
- store <47 x i8> undef, <47 x i8>* undef, align 4
- store <48 x i8> undef, <48 x i8>* undef, align 4
- store <49 x i8> undef, <49 x i8>* undef, align 4
- store <50 x i8> undef, <50 x i8>* undef, align 4
- store <51 x i8> undef, <51 x i8>* undef, align 4
- store <52 x i8> undef, <52 x i8>* undef, align 4
- store <53 x i8> undef, <53 x i8>* undef, align 4
- store <54 x i8> undef, <54 x i8>* undef, align 4
- store <55 x i8> undef, <55 x i8>* undef, align 4
- store <56 x i8> undef, <56 x i8>* undef, align 4
- store <57 x i8> undef, <57 x i8>* undef, align 4
- store <58 x i8> undef, <58 x i8>* undef, align 4
- store <59 x i8> undef, <59 x i8>* undef, align 4
- store <60 x i8> undef, <60 x i8>* undef, align 4
- store <61 x i8> undef, <61 x i8>* undef, align 4
- store <62 x i8> undef, <62 x i8>* undef, align 4
- store <63 x i8> undef, <63 x i8>* undef, align 4
+ store <33 x i8> undef, ptr undef, align 4
+ store <34 x i8> undef, ptr undef, align 4
+ store <35 x i8> undef, ptr undef, align 4
+ store <36 x i8> undef, ptr undef, align 4
+ store <37 x i8> undef, ptr undef, align 4
+ store <38 x i8> undef, ptr undef, align 4
+ store <39 x i8> undef, ptr undef, align 4
+ store <40 x i8> undef, ptr undef, align 4
+ store <41 x i8> undef, ptr undef, align 4
+ store <42 x i8> undef, ptr undef, align 4
+ store <43 x i8> undef, ptr undef, align 4
+ store <44 x i8> undef, ptr undef, align 4
+ store <45 x i8> undef, ptr undef, align 4
+ store <46 x i8> undef, ptr undef, align 4
+ store <47 x i8> undef, ptr undef, align 4
+ store <48 x i8> undef, ptr undef, align 4
+ store <49 x i8> undef, ptr undef, align 4
+ store <50 x i8> undef, ptr undef, align 4
+ store <51 x i8> undef, ptr undef, align 4
+ store <52 x i8> undef, ptr undef, align 4
+ store <53 x i8> undef, ptr undef, align 4
+ store <54 x i8> undef, ptr undef, align 4
+ store <55 x i8> undef, ptr undef, align 4
+ store <56 x i8> undef, ptr undef, align 4
+ store <57 x i8> undef, ptr undef, align 4
+ store <58 x i8> undef, ptr undef, align 4
+ store <59 x i8> undef, ptr undef, align 4
+ store <60 x i8> undef, ptr undef, align 4
+ store <61 x i8> undef, ptr undef, align 4
+ store <62 x i8> undef, ptr undef, align 4
+ store <63 x i8> undef, ptr undef, align 4
; <64 x i8> is ZMM
; Partial vectors with i1 elements
; <1 x i1> is XMM
; <2 x i1> is XMM
- store <3 x i1> undef, <3 x i1>* undef, align 4
+ store <3 x i1> undef, ptr undef, align 4
; <4 x i1> is XMM
- store <5 x i1> undef, <5 x i1>* undef, align 4
- store <6 x i1> undef, <6 x i1>* undef, align 4
- store <7 x i1> undef, <7 x i1>* undef, align 4
+ store <5 x i1> undef, ptr undef, align 4
+ store <6 x i1> undef, ptr undef, align 4
+ store <7 x i1> undef, ptr undef, align 4
; <8 x i1> is XMM
- store <9 x i1> undef, <9 x i1>* undef, align 4
- store <10 x i1> undef, <10 x i1>* undef, align 4
- store <11 x i1> undef, <11 x i1>* undef, align 4
- store <12 x i1> undef, <12 x i1>* undef, align 4
- store <13 x i1> undef, <13 x i1>* undef, align 4
- store <14 x i1> undef, <14 x i1>* undef, align 4
- store <15 x i1> undef, <15 x i1>* undef, align 4
+ store <9 x i1> undef, ptr undef, align 4
+ store <10 x i1> undef, ptr undef, align 4
+ store <11 x i1> undef, ptr undef, align 4
+ store <12 x i1> undef, ptr undef, align 4
+ store <13 x i1> undef, ptr undef, align 4
+ store <14 x i1> undef, ptr undef, align 4
+ store <15 x i1> undef, ptr undef, align 4
; <16 x i1> is XMM
- store <17 x i1> undef, <17 x i1>* undef, align 4
- store <18 x i1> undef, <18 x i1>* undef, align 4
- store <19 x i1> undef, <19 x i1>* undef, align 4
- store <20 x i1> undef, <20 x i1>* undef, align 4
- store <21 x i1> undef, <21 x i1>* undef, align 4
- store <22 x i1> undef, <22 x i1>* undef, align 4
- store <23 x i1> undef, <23 x i1>* undef, align 4
- store <24 x i1> undef, <24 x i1>* undef, align 4
- store <25 x i1> undef, <25 x i1>* undef, align 4
- store <26 x i1> undef, <26 x i1>* undef, align 4
- store <27 x i1> undef, <27 x i1>* undef, align 4
- store <28 x i1> undef, <28 x i1>* undef, align 4
- store <29 x i1> undef, <29 x i1>* undef, align 4
- store <30 x i1> undef, <30 x i1>* undef, align 4
- store <31 x i1> undef, <31 x i1>* undef, align 4
+ store <17 x i1> undef, ptr undef, align 4
+ store <18 x i1> undef, ptr undef, align 4
+ store <19 x i1> undef, ptr undef, align 4
+ store <20 x i1> undef, ptr undef, align 4
+ store <21 x i1> undef, ptr undef, align 4
+ store <22 x i1> undef, ptr undef, align 4
+ store <23 x i1> undef, ptr undef, align 4
+ store <24 x i1> undef, ptr undef, align 4
+ store <25 x i1> undef, ptr undef, align 4
+ store <26 x i1> undef, ptr undef, align 4
+ store <27 x i1> undef, ptr undef, align 4
+ store <28 x i1> undef, ptr undef, align 4
+ store <29 x i1> undef, ptr undef, align 4
+ store <30 x i1> undef, ptr undef, align 4
+ store <31 x i1> undef, ptr undef, align 4
; <32 x i1> is YMM
- store <33 x i1> undef, <33 x i1>* undef, align 4
- store <34 x i1> undef, <34 x i1>* undef, align 4
- store <35 x i1> undef, <35 x i1>* undef, align 4
- store <36 x i1> undef, <36 x i1>* undef, align 4
- store <37 x i1> undef, <37 x i1>* undef, align 4
- store <38 x i1> undef, <38 x i1>* undef, align 4
- store <39 x i1> undef, <39 x i1>* undef, align 4
- store <40 x i1> undef, <40 x i1>* undef, align 4
- store <41 x i1> undef, <41 x i1>* undef, align 4
- store <42 x i1> undef, <42 x i1>* undef, align 4
- store <43 x i1> undef, <43 x i1>* undef, align 4
- store <44 x i1> undef, <44 x i1>* undef, align 4
- store <45 x i1> undef, <45 x i1>* undef, align 4
- store <46 x i1> undef, <46 x i1>* undef, align 4
- store <47 x i1> undef, <47 x i1>* undef, align 4
- store <48 x i1> undef, <48 x i1>* undef, align 4
- store <49 x i1> undef, <49 x i1>* undef, align 4
- store <50 x i1> undef, <50 x i1>* undef, align 4
- store <51 x i1> undef, <51 x i1>* undef, align 4
- store <52 x i1> undef, <52 x i1>* undef, align 4
- store <53 x i1> undef, <53 x i1>* undef, align 4
- store <54 x i1> undef, <54 x i1>* undef, align 4
- store <55 x i1> undef, <55 x i1>* undef, align 4
- store <56 x i1> undef, <56 x i1>* undef, align 4
- store <57 x i1> undef, <57 x i1>* undef, align 4
- store <58 x i1> undef, <58 x i1>* undef, align 4
- store <59 x i1> undef, <59 x i1>* undef, align 4
- store <60 x i1> undef, <60 x i1>* undef, align 4
- store <61 x i1> undef, <61 x i1>* undef, align 4
- store <62 x i1> undef, <62 x i1>* undef, align 4
- store <63 x i1> undef, <63 x i1>* undef, align 4
+ store <33 x i1> undef, ptr undef, align 4
+ store <34 x i1> undef, ptr undef, align 4
+ store <35 x i1> undef, ptr undef, align 4
+ store <36 x i1> undef, ptr undef, align 4
+ store <37 x i1> undef, ptr undef, align 4
+ store <38 x i1> undef, ptr undef, align 4
+ store <39 x i1> undef, ptr undef, align 4
+ store <40 x i1> undef, ptr undef, align 4
+ store <41 x i1> undef, ptr undef, align 4
+ store <42 x i1> undef, ptr undef, align 4
+ store <43 x i1> undef, ptr undef, align 4
+ store <44 x i1> undef, ptr undef, align 4
+ store <45 x i1> undef, ptr undef, align 4
+ store <46 x i1> undef, ptr undef, align 4
+ store <47 x i1> undef, ptr undef, align 4
+ store <48 x i1> undef, ptr undef, align 4
+ store <49 x i1> undef, ptr undef, align 4
+ store <50 x i1> undef, ptr undef, align 4
+ store <51 x i1> undef, ptr undef, align 4
+ store <52 x i1> undef, ptr undef, align 4
+ store <53 x i1> undef, ptr undef, align 4
+ store <54 x i1> undef, ptr undef, align 4
+ store <55 x i1> undef, ptr undef, align 4
+ store <56 x i1> undef, ptr undef, align 4
+ store <57 x i1> undef, ptr undef, align 4
+ store <58 x i1> undef, ptr undef, align 4
+ store <59 x i1> undef, ptr undef, align 4
+ store <60 x i1> undef, ptr undef, align 4
+ store <61 x i1> undef, ptr undef, align 4
+ store <62 x i1> undef, ptr undef, align 4
+ store <63 x i1> undef, ptr undef, align 4
; <64 x i1> is ZMM
ret i32 undef
define i32 @stores_align1(i32 %arg) {
; Scalars
; SSE2-LABEL: 'stores_align1'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, i1* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <128 x i1> undef, <128 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, <16 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, <8 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, <4 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, <2 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, <1 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <256 x i1> undef, <256 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, <8 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, <4 x double>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, <32 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <512 x i1> undef, <512 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x float> undef, <16 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x double> undef, <8 x double>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, <64 x i1>* undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store ptr undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <128 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <256 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x ptr> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <512 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <64 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x float> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x double> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x ptr> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, ptr undef, align 1
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE41-LABEL: 'stores_align1'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, i1* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <128 x i1> undef, <128 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, <16 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, <8 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, <4 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, <2 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, <1 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <256 x i1> undef, <256 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, <8 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, <4 x double>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, <32 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <512 x i1> undef, <512 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x float> undef, <16 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x double> undef, <8 x double>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, <64 x i1>* undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store ptr undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <128 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <256 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x ptr> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <512 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <64 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x float> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x double> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x ptr> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, ptr undef, align 1
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'stores_align1'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, i1* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <128 x i1> undef, <128 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, <16 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, <8 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, <4 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, <2 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, <1 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <256 x i1> undef, <256 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x float> undef, <8 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, <4 x double>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, <32 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <512 x i1> undef, <512 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x float> undef, <16 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x double> undef, <8 x double>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, <64 x i1>* undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store ptr undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <128 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <256 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x float> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x ptr> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <512 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <64 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i32> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i64> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x float> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x double> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x ptr> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, ptr undef, align 1
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'stores_align1'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, i1* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <128 x i1> undef, <128 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, <16 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, <8 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, <4 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, <2 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, <1 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <256 x i1> undef, <256 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x float> undef, <8 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, <4 x double>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, <32 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <512 x i1> undef, <512 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x float> undef, <16 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x double> undef, <8 x double>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, <64 x i1>* undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store ptr undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <128 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <256 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x float> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x ptr> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <512 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i32> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i64> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x float> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x double> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x ptr> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, ptr undef, align 1
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- store i8 undef, i8* undef, align 1
- store i16 undef, i16* undef, align 1
- store i32 undef, i32* undef, align 1
- store i64 undef, i64* undef, align 1
- store i128 undef, i128* undef, align 1
- store i256 undef, i256* undef, align 1
- store i512 undef, i512* undef, align 1
+ store i8 undef, ptr undef, align 1
+ store i16 undef, ptr undef, align 1
+ store i32 undef, ptr undef, align 1
+ store i64 undef, ptr undef, align 1
+ store i128 undef, ptr undef, align 1
+ store i256 undef, ptr undef, align 1
+ store i512 undef, ptr undef, align 1
- store float undef, float* undef, align 1
- store double undef, double* undef, align 1
+ store float undef, ptr undef, align 1
+ store double undef, ptr undef, align 1
- store i8* undef, i8** undef, align 1
+ store ptr undef, ptr undef, align 1
- store i1 undef, i1* undef, align 1
+ store i1 undef, ptr undef, align 1
; XMM (128-bit) vectors
- store <128 x i1> undef, <128 x i1>* undef, align 1
- store <16 x i8> undef, <16 x i8>* undef, align 1
- store <8 x i16> undef, <8 x i16>* undef, align 1
- store <4 x i32> undef, <4 x i32>* undef, align 1
- store <2 x i64> undef, <2 x i64>* undef, align 1
- store <1 x i128> undef, <1 x i128>* undef, align 1
+ store <128 x i1> undef, ptr undef, align 1
+ store <16 x i8> undef, ptr undef, align 1
+ store <8 x i16> undef, ptr undef, align 1
+ store <4 x i32> undef, ptr undef, align 1
+ store <2 x i64> undef, ptr undef, align 1
+ store <1 x i128> undef, ptr undef, align 1
- store <4 x float> undef, <4 x float>* undef, align 1
- store <2 x double> undef, <2 x double>* undef, align 1
+ store <4 x float> undef, ptr undef, align 1
+ store <2 x double> undef, ptr undef, align 1
- store <2 x i8*> undef, <2 x i8*>* undef, align 1
+ store <2 x ptr> undef, ptr undef, align 1
- store <16 x i1> undef, <16 x i1>* undef, align 1
- store <8 x i1> undef, <8 x i1>* undef, align 1
- store <4 x i1> undef, <4 x i1>* undef, align 1
- store <2 x i1> undef, <2 x i1>* undef, align 1
- store <1 x i1> undef, <1 x i1>* undef, align 1
+ store <16 x i1> undef, ptr undef, align 1
+ store <8 x i1> undef, ptr undef, align 1
+ store <4 x i1> undef, ptr undef, align 1
+ store <2 x i1> undef, ptr undef, align 1
+ store <1 x i1> undef, ptr undef, align 1
; YMM (256-bit) vectors
- store <256 x i1> undef, <256 x i1>* undef, align 1
- store <32 x i8> undef, <32 x i8>* undef, align 1
- store <16 x i16> undef, <16 x i16>* undef, align 1
- store <8 x i32> undef, <8 x i32>* undef, align 1
- store <4 x i64> undef, <4 x i64>* undef, align 1
- store <2 x i128> undef, <2 x i128>* undef, align 1
- store <1 x i256> undef, <1 x i256>* undef, align 1
+ store <256 x i1> undef, ptr undef, align 1
+ store <32 x i8> undef, ptr undef, align 1
+ store <16 x i16> undef, ptr undef, align 1
+ store <8 x i32> undef, ptr undef, align 1
+ store <4 x i64> undef, ptr undef, align 1
+ store <2 x i128> undef, ptr undef, align 1
+ store <1 x i256> undef, ptr undef, align 1
- store <8 x float> undef, <8 x float>* undef, align 1
- store <4 x double> undef, <4 x double>* undef, align 1
+ store <8 x float> undef, ptr undef, align 1
+ store <4 x double> undef, ptr undef, align 1
- store <4 x i8*> undef, <4 x i8*>* undef, align 1
+ store <4 x ptr> undef, ptr undef, align 1
- store <32 x i1> undef, <32 x i1>* undef, align 1
+ store <32 x i1> undef, ptr undef, align 1
; ZMM (512-bit) vectors
- store <512 x i1> undef, <512 x i1>* undef, align 1
- store <64 x i8> undef, <64 x i8>* undef, align 1
- store <32 x i16> undef, <32 x i16>* undef, align 1
- store <16 x i32> undef, <16 x i32>* undef, align 1
- store <8 x i64> undef, <8 x i64>* undef, align 1
- store <4 x i128> undef, <4 x i128>* undef, align 1
- store <2 x i256> undef, <2 x i256>* undef, align 1
- store <1 x i512> undef, <1 x i512>* undef, align 1
+ store <512 x i1> undef, ptr undef, align 1
+ store <64 x i8> undef, ptr undef, align 1
+ store <32 x i16> undef, ptr undef, align 1
+ store <16 x i32> undef, ptr undef, align 1
+ store <8 x i64> undef, ptr undef, align 1
+ store <4 x i128> undef, ptr undef, align 1
+ store <2 x i256> undef, ptr undef, align 1
+ store <1 x i512> undef, ptr undef, align 1
- store <16 x float> undef, <16 x float>* undef, align 1
- store <8 x double> undef, <8 x double>* undef, align 1
+ store <16 x float> undef, ptr undef, align 1
+ store <8 x double> undef, ptr undef, align 1
- store <8 x i8*> undef, <8 x i8*>* undef, align 1
+ store <8 x ptr> undef, ptr undef, align 1
- store <64 x i1> undef, <64 x i1>* undef, align 1
+ store <64 x i1> undef, ptr undef, align 1
ret i32 undef
}
define i32 @stores_partial_align1(i32 %arg) {
; Partial vectors with i64 elements (doubles as pointer-sized tests))
; SSE2-LABEL: 'stores_partial_align1'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, <3 x double>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, <5 x double>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, <6 x double>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, <7 x double>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x float> undef, <5 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, <6 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x float> undef, <7 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, <9 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, <10 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, <11 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, <12 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, <13 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, <14 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, <15 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, <3 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, <5 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, <6 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, <7 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i1> undef, <9 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <10 x i1> undef, <10 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i1> undef, <11 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <12 x i1> undef, <12 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i1> undef, <13 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i1> undef, <14 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <15 x i1> undef, <15 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i1> undef, <17 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i1> undef, <18 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <19 x i1> undef, <19 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i1> undef, <20 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <21 x i1> undef, <21 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <22 x i1> undef, <22 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <23 x i1> undef, <23 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i1> undef, <24 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, <25 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, <26 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, <27 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, <28 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, <29 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, <30 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, <31 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, <33 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, <34 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, <35 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, <36 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, <37 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, <38 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, <39 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, <40 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i1> undef, <41 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i1> undef, <42 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <43 x i1> undef, <43 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i1> undef, <44 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <45 x i1> undef, <45 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <46 x i1> undef, <46 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <47 x i1> undef, <47 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, <48 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <49 x i1> undef, <49 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <50 x i1> undef, <50 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <51 x i1> undef, <51 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <52 x i1> undef, <52 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <53 x i1> undef, <53 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <54 x i1> undef, <54 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <55 x i1> undef, <55 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <56 x i1> undef, <56 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i1> undef, <57 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i1> undef, <58 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <59 x i1> undef, <59 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i1> undef, <60 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <61 x i1> undef, <61 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <62 x i1> undef, <62 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <63 x i1> undef, <63 x i1>* undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x i64> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i64> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i32> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i32> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i32> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i32> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i32> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i32> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x float> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x float> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <6 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <22 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <7 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <12 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <13 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <14 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <15 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <19 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <22 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <29 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <31 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <38 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <39 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <41 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <43 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <44 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <45 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <46 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <47 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <49 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <51 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <54 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <55 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <59 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <60 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <61 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <63 x i8> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <10 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <12 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <15 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <19 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <21 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <22 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <23 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <43 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <45 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <46 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <47 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <49 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <50 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <51 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <52 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <53 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <54 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <55 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <56 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <59 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <61 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <62 x i1> undef, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <63 x i1> undef, ptr undef, align 1
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE41-LABEL: 'stores_partial_align1'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, <3 x double>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, <5 x double>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, <6 x double>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, <7 x double>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x float> undef, <5 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, <6 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x float> undef, <7 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, <9 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, <10 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, <11 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, <12 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, <13 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, <14 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, <15 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, <3 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, <5 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, <6 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, <7 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, <9 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, <10 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, <11 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, <12 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, <13 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, <14 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, <15 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i1> undef, <17 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i1> undef, <18 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <19 x i1> undef, <19 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i1> undef, <20 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <21 x i1> undef, <21 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <22 x i1> undef, <22 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <23 x i1> undef, <23 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i1> undef, <24 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i1> undef, <25 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i1> undef, <26 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <27 x i1> undef, <27 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i1> undef, <28 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <29 x i1> undef, <29 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <30 x i1> undef, <30 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <31 x i1> undef, <31 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, <33 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, <34 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, <35 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, <36 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, <37 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, <38 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, <39 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, <40 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <41 x i1> undef, <41 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <42 x i1> undef, <42 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <43 x i1> undef, <43 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <44 x i1> undef, <44 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <45 x i1> undef, <45 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <46 x i1> undef, <46 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <47 x i1> undef, <47 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, <48 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <49 x i1> undef, <49 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <50 x i1> undef, <50 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <51 x i1> undef, <51 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <52 x i1> undef, <52 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <53 x i1> undef, <53 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <54 x i1> undef, <54 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <55 x i1> undef, <55 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <56 x i1> undef, <56 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <57 x i1> undef, <57 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <58 x i1> undef, <58 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <59 x i1> undef, <59 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <60 x i1> undef, <60 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <61 x i1> undef, <61 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <62 x i1> undef, <62 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <63 x i1> undef, <63 x i1>* undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x i64> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i64> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i32> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i32> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i32> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i32> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i32> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x i32> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x float> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x float> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <22 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <30 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <31 x i16> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <19 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <21 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <22 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <27 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <30 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <31 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <35 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <37 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <38 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <39 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <43 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <46 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <47 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <49 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <51 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <53 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <54 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <55 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <59 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <61 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <62 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <63 x i8> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <19 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <21 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <22 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <23 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <27 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <29 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <30 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <31 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <41 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <42 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <43 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <44 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <45 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <46 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <47 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <49 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <50 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <51 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <52 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <53 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <54 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <55 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <56 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <57 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <58 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <59 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <60 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <61 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <62 x i1> undef, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <63 x i1> undef, ptr undef, align 1
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'stores_partial_align1'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x double> undef, <5 x double>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x double> undef, <6 x double>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, <7 x double>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x float> undef, <5 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x float> undef, <6 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x float> undef, <7 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x float> undef, <9 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x float> undef, <10 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x float> undef, <11 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x float> undef, <12 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, <13 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, <14 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, <15 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, <3 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, <5 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, <6 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, <7 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, <9 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, <10 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, <11 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, <12 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, <13 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, <14 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, <15 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i1> undef, <17 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i1> undef, <18 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <19 x i1> undef, <19 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i1> undef, <20 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <21 x i1> undef, <21 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <22 x i1> undef, <22 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <23 x i1> undef, <23 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i1> undef, <24 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, <25 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, <26 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, <27 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, <28 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, <29 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, <30 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, <31 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, <33 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, <34 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, <35 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, <36 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, <37 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, <38 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, <39 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, <40 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <41 x i1> undef, <41 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <42 x i1> undef, <42 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <43 x i1> undef, <43 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <44 x i1> undef, <44 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <45 x i1> undef, <45 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <46 x i1> undef, <46 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <47 x i1> undef, <47 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, <48 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <49 x i1> undef, <49 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <50 x i1> undef, <50 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <51 x i1> undef, <51 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <52 x i1> undef, <52 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <53 x i1> undef, <53 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <54 x i1> undef, <54 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <55 x i1> undef, <55 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i1> undef, <56 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i1> undef, <57 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i1> undef, <58 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <59 x i1> undef, <59 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i1> undef, <60 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <61 x i1> undef, <61 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <62 x i1> undef, <62 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <63 x i1> undef, <63 x i1>* undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i64> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i64> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i64> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x double> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x double> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i32> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x i32> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x i32> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i32> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i32> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i32> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x i32> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x float> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x float> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x float> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x float> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x float> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x float> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x float> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <10 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <19 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <21 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <22 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <30 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <31 x i16> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <18 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <19 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <20 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <21 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <22 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <29 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <35 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <37 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <38 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <39 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <41 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <42 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <43 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <44 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <45 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <46 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <47 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <50 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <51 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <52 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <54 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <55 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <59 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <61 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <62 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <63 x i8> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <19 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <21 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <22 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <23 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <41 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <42 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <43 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <44 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <45 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <46 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <47 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <49 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <50 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <51 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <52 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <53 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <54 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <55 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <59 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <61 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <62 x i1> undef, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <63 x i1> undef, ptr undef, align 1
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'stores_partial_align1'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, <5 x double>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, <6 x double>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x double> undef, <7 x double>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x float> undef, <5 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x float> undef, <6 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x float> undef, <7 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, <9 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, <10 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, <11 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, <12 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x float> undef, <13 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x float> undef, <14 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x float> undef, <15 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, <3 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, <5 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, <6 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, <7 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, <9 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, <10 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, <11 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, <12 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, <13 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, <14 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, <15 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i1> undef, <17 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i1> undef, <18 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <19 x i1> undef, <19 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i1> undef, <20 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <21 x i1> undef, <21 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <22 x i1> undef, <22 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <23 x i1> undef, <23 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i1> undef, <24 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, <25 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, <26 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, <27 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, <28 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, <29 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, <30 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, <31 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <33 x i1> undef, <33 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <34 x i1> undef, <34 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <35 x i1> undef, <35 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <36 x i1> undef, <36 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <37 x i1> undef, <37 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <38 x i1> undef, <38 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <39 x i1> undef, <39 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i1> undef, <40 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i1> undef, <41 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i1> undef, <42 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <43 x i1> undef, <43 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i1> undef, <44 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <45 x i1> undef, <45 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <46 x i1> undef, <46 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <47 x i1> undef, <47 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i1> undef, <48 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i1> undef, <49 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <50 x i1> undef, <50 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <51 x i1> undef, <51 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <52 x i1> undef, <52 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <53 x i1> undef, <53 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <54 x i1> undef, <54 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <55 x i1> undef, <55 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <56 x i1> undef, <56 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <57 x i1> undef, <57 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <58 x i1> undef, <58 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <59 x i1> undef, <59 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <60 x i1> undef, <60 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <61 x i1> undef, <61 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <62 x i1> undef, <62 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <63 x i1> undef, <63 x i1>* undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i64> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x double> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i32> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i32> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i32> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <13 x i32> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i32> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i32> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x float> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x float> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x float> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x float> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x float> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x float> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <10 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <18 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <19 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <22 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <25 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <26 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <27 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <29 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <18 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <19 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <20 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <21 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <22 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <29 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <34 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <36 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <38 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <39 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <43 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <46 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <47 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <49 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <50 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <51 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <52 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <53 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <54 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <55 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <56 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <58 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <59 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <60 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <61 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <63 x i8> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <19 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <21 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <22 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <23 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <33 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <34 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <35 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <36 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <37 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <38 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <39 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <43 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <45 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <46 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <47 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <50 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <51 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <52 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <53 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <54 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <55 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <56 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <57 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <58 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <59 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <60 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <61 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <62 x i1> undef, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <63 x i1> undef, ptr undef, align 1
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- store <1 x i64> undef, <1 x i64>* undef, align 1
+ store <1 x i64> undef, ptr undef, align 1
; <2 x i64> is XMM
- store <3 x i64> undef, <3 x i64>* undef, align 1
+ store <3 x i64> undef, ptr undef, align 1
; <4 x i64> is YMM
- store <5 x i64> undef, <5 x i64>* undef, align 1
- store <6 x i64> undef, <6 x i64>* undef, align 1
- store <7 x i64> undef, <7 x i64>* undef, align 1
+ store <5 x i64> undef, ptr undef, align 1
+ store <6 x i64> undef, ptr undef, align 1
+ store <7 x i64> undef, ptr undef, align 1
; <8 x i64> is ZMM
; Partial vectors with double elements
- store <1 x double> undef, <1 x double>* undef, align 1
+ store <1 x double> undef, ptr undef, align 1
; <2 x double> is XMM
- store <3 x double> undef, <3 x double>* undef, align 1
+ store <3 x double> undef, ptr undef, align 1
; <4 x double> is YMM
- store <5 x double> undef, <5 x double>* undef, align 1
- store <6 x double> undef, <6 x double>* undef, align 1
- store <7 x double> undef, <7 x double>* undef, align 1
+ store <5 x double> undef, ptr undef, align 1
+ store <6 x double> undef, ptr undef, align 1
+ store <7 x double> undef, ptr undef, align 1
; <8 x double> is ZMM
; Partial vectors with i32 elements
- store <1 x i32> undef, <1 x i32>* undef, align 1
- store <2 x i32> undef, <2 x i32>* undef, align 1
- store <3 x i32> undef, <3 x i32>* undef, align 1
+ store <1 x i32> undef, ptr undef, align 1
+ store <2 x i32> undef, ptr undef, align 1
+ store <3 x i32> undef, ptr undef, align 1
; <4 x i32> is XMM
- store <5 x i32> undef, <5 x i32>* undef, align 1
- store <6 x i32> undef, <6 x i32>* undef, align 1
- store <7 x i32> undef, <7 x i32>* undef, align 1
+ store <5 x i32> undef, ptr undef, align 1
+ store <6 x i32> undef, ptr undef, align 1
+ store <7 x i32> undef, ptr undef, align 1
; <8 x i32> is YMM
- store <9 x i32> undef, <9 x i32>* undef, align 1
- store <10 x i32> undef, <10 x i32>* undef, align 1
- store <11 x i32> undef, <11 x i32>* undef, align 1
- store <12 x i32> undef, <12 x i32>* undef, align 1
- store <13 x i32> undef, <13 x i32>* undef, align 1
- store <14 x i32> undef, <14 x i32>* undef, align 1
- store <15 x i32> undef, <15 x i32>* undef, align 1
+ store <9 x i32> undef, ptr undef, align 1
+ store <10 x i32> undef, ptr undef, align 1
+ store <11 x i32> undef, ptr undef, align 1
+ store <12 x i32> undef, ptr undef, align 1
+ store <13 x i32> undef, ptr undef, align 1
+ store <14 x i32> undef, ptr undef, align 1
+ store <15 x i32> undef, ptr undef, align 1
; <16 x i32> is ZMM
; Partial vectors with float elements
- store <1 x float> undef, <1 x float>* undef, align 1
- store <2 x float> undef, <2 x float>* undef, align 1
- store <3 x float> undef, <3 x float>* undef, align 1
+ store <1 x float> undef, ptr undef, align 1
+ store <2 x float> undef, ptr undef, align 1
+ store <3 x float> undef, ptr undef, align 1
; <4 x float> is XMM
- store <5 x float> undef, <5 x float>* undef, align 1
- store <6 x float> undef, <6 x float>* undef, align 1
- store <7 x float> undef, <7 x float>* undef, align 1
+ store <5 x float> undef, ptr undef, align 1
+ store <6 x float> undef, ptr undef, align 1
+ store <7 x float> undef, ptr undef, align 1
; <8 x float> is YMM
- store <9 x float> undef, <9 x float>* undef, align 1
- store <10 x float> undef, <10 x float>* undef, align 1
- store <11 x float> undef, <11 x float>* undef, align 1
- store <12 x float> undef, <12 x float>* undef, align 1
- store <13 x float> undef, <13 x float>* undef, align 1
- store <14 x float> undef, <14 x float>* undef, align 1
- store <15 x float> undef, <15 x float>* undef, align 1
+ store <9 x float> undef, ptr undef, align 1
+ store <10 x float> undef, ptr undef, align 1
+ store <11 x float> undef, ptr undef, align 1
+ store <12 x float> undef, ptr undef, align 1
+ store <13 x float> undef, ptr undef, align 1
+ store <14 x float> undef, ptr undef, align 1
+ store <15 x float> undef, ptr undef, align 1
; <16 x float> is ZMM
; Partial vectors with i16 elements
- store <1 x i16> undef, <1 x i16>* undef, align 1
- store <2 x i16> undef, <2 x i16>* undef, align 1
- store <3 x i16> undef, <3 x i16>* undef, align 1
- store <4 x i16> undef, <4 x i16>* undef, align 1
- store <5 x i16> undef, <5 x i16>* undef, align 1
- store <6 x i16> undef, <6 x i16>* undef, align 1
- store <7 x i16> undef, <7 x i16>* undef, align 1
+ store <1 x i16> undef, ptr undef, align 1
+ store <2 x i16> undef, ptr undef, align 1
+ store <3 x i16> undef, ptr undef, align 1
+ store <4 x i16> undef, ptr undef, align 1
+ store <5 x i16> undef, ptr undef, align 1
+ store <6 x i16> undef, ptr undef, align 1
+ store <7 x i16> undef, ptr undef, align 1
; <8 x i16> is XMM
- store <9 x i16> undef, <9 x i16>* undef, align 1
- store <10 x i16> undef, <10 x i16>* undef, align 1
- store <11 x i16> undef, <11 x i16>* undef, align 1
- store <12 x i16> undef, <12 x i16>* undef, align 1
- store <13 x i16> undef, <13 x i16>* undef, align 1
- store <14 x i16> undef, <14 x i16>* undef, align 1
- store <15 x i16> undef, <15 x i16>* undef, align 1
+ store <9 x i16> undef, ptr undef, align 1
+ store <10 x i16> undef, ptr undef, align 1
+ store <11 x i16> undef, ptr undef, align 1
+ store <12 x i16> undef, ptr undef, align 1
+ store <13 x i16> undef, ptr undef, align 1
+ store <14 x i16> undef, ptr undef, align 1
+ store <15 x i16> undef, ptr undef, align 1
; <16 x i16> is YMM
- store <17 x i16> undef, <17 x i16>* undef, align 1
- store <18 x i16> undef, <18 x i16>* undef, align 1
- store <19 x i16> undef, <19 x i16>* undef, align 1
- store <20 x i16> undef, <20 x i16>* undef, align 1
- store <21 x i16> undef, <21 x i16>* undef, align 1
- store <22 x i16> undef, <22 x i16>* undef, align 1
- store <23 x i16> undef, <23 x i16>* undef, align 1
- store <24 x i16> undef, <24 x i16>* undef, align 1
- store <25 x i16> undef, <25 x i16>* undef, align 1
- store <26 x i16> undef, <26 x i16>* undef, align 1
- store <27 x i16> undef, <27 x i16>* undef, align 1
- store <28 x i16> undef, <28 x i16>* undef, align 1
- store <29 x i16> undef, <29 x i16>* undef, align 1
- store <30 x i16> undef, <30 x i16>* undef, align 1
- store <31 x i16> undef, <31 x i16>* undef, align 1
+ store <17 x i16> undef, ptr undef, align 1
+ store <18 x i16> undef, ptr undef, align 1
+ store <19 x i16> undef, ptr undef, align 1
+ store <20 x i16> undef, ptr undef, align 1
+ store <21 x i16> undef, ptr undef, align 1
+ store <22 x i16> undef, ptr undef, align 1
+ store <23 x i16> undef, ptr undef, align 1
+ store <24 x i16> undef, ptr undef, align 1
+ store <25 x i16> undef, ptr undef, align 1
+ store <26 x i16> undef, ptr undef, align 1
+ store <27 x i16> undef, ptr undef, align 1
+ store <28 x i16> undef, ptr undef, align 1
+ store <29 x i16> undef, ptr undef, align 1
+ store <30 x i16> undef, ptr undef, align 1
+ store <31 x i16> undef, ptr undef, align 1
; <32 x i16> is ZMM
; Partial vectors with i8 elements
- store <1 x i8> undef, <1 x i8>* undef, align 1
- store <2 x i8> undef, <2 x i8>* undef, align 1
- store <3 x i8> undef, <3 x i8>* undef, align 1
- store <4 x i8> undef, <4 x i8>* undef, align 1
- store <5 x i8> undef, <5 x i8>* undef, align 1
- store <6 x i8> undef, <6 x i8>* undef, align 1
- store <7 x i8> undef, <7 x i8>* undef, align 1
- store <8 x i8> undef, <8 x i8>* undef, align 1
- store <9 x i8> undef, <9 x i8>* undef, align 1
- store <10 x i8> undef, <10 x i8>* undef, align 1
- store <11 x i8> undef, <11 x i8>* undef, align 1
- store <12 x i8> undef, <12 x i8>* undef, align 1
- store <13 x i8> undef, <13 x i8>* undef, align 1
- store <14 x i8> undef, <14 x i8>* undef, align 1
- store <15 x i8> undef, <15 x i8>* undef, align 1
+ store <1 x i8> undef, ptr undef, align 1
+ store <2 x i8> undef, ptr undef, align 1
+ store <3 x i8> undef, ptr undef, align 1
+ store <4 x i8> undef, ptr undef, align 1
+ store <5 x i8> undef, ptr undef, align 1
+ store <6 x i8> undef, ptr undef, align 1
+ store <7 x i8> undef, ptr undef, align 1
+ store <8 x i8> undef, ptr undef, align 1
+ store <9 x i8> undef, ptr undef, align 1
+ store <10 x i8> undef, ptr undef, align 1
+ store <11 x i8> undef, ptr undef, align 1
+ store <12 x i8> undef, ptr undef, align 1
+ store <13 x i8> undef, ptr undef, align 1
+ store <14 x i8> undef, ptr undef, align 1
+ store <15 x i8> undef, ptr undef, align 1
; <16 x i8> is XMM
- store <17 x i8> undef, <17 x i8>* undef, align 1
- store <18 x i8> undef, <18 x i8>* undef, align 1
- store <19 x i8> undef, <19 x i8>* undef, align 1
- store <20 x i8> undef, <20 x i8>* undef, align 1
- store <21 x i8> undef, <21 x i8>* undef, align 1
- store <22 x i8> undef, <22 x i8>* undef, align 1
- store <23 x i8> undef, <23 x i8>* undef, align 1
- store <24 x i8> undef, <24 x i8>* undef, align 1
- store <25 x i8> undef, <25 x i8>* undef, align 1
- store <26 x i8> undef, <26 x i8>* undef, align 1
- store <27 x i8> undef, <27 x i8>* undef, align 1
- store <28 x i8> undef, <28 x i8>* undef, align 1
- store <29 x i8> undef, <29 x i8>* undef, align 1
- store <30 x i8> undef, <30 x i8>* undef, align 1
- store <31 x i8> undef, <31 x i8>* undef, align 1
+ store <17 x i8> undef, ptr undef, align 1
+ store <18 x i8> undef, ptr undef, align 1
+ store <19 x i8> undef, ptr undef, align 1
+ store <20 x i8> undef, ptr undef, align 1
+ store <21 x i8> undef, ptr undef, align 1
+ store <22 x i8> undef, ptr undef, align 1
+ store <23 x i8> undef, ptr undef, align 1
+ store <24 x i8> undef, ptr undef, align 1
+ store <25 x i8> undef, ptr undef, align 1
+ store <26 x i8> undef, ptr undef, align 1
+ store <27 x i8> undef, ptr undef, align 1
+ store <28 x i8> undef, ptr undef, align 1
+ store <29 x i8> undef, ptr undef, align 1
+ store <30 x i8> undef, ptr undef, align 1
+ store <31 x i8> undef, ptr undef, align 1
; <32 x i8> is YMM
- store <33 x i8> undef, <33 x i8>* undef, align 1
- store <34 x i8> undef, <34 x i8>* undef, align 1
- store <35 x i8> undef, <35 x i8>* undef, align 1
- store <36 x i8> undef, <36 x i8>* undef, align 1
- store <37 x i8> undef, <37 x i8>* undef, align 1
- store <38 x i8> undef, <38 x i8>* undef, align 1
- store <39 x i8> undef, <39 x i8>* undef, align 1
- store <40 x i8> undef, <40 x i8>* undef, align 1
- store <41 x i8> undef, <41 x i8>* undef, align 1
- store <42 x i8> undef, <42 x i8>* undef, align 1
- store <43 x i8> undef, <43 x i8>* undef, align 1
- store <44 x i8> undef, <44 x i8>* undef, align 1
- store <45 x i8> undef, <45 x i8>* undef, align 1
- store <46 x i8> undef, <46 x i8>* undef, align 1
- store <47 x i8> undef, <47 x i8>* undef, align 1
- store <48 x i8> undef, <48 x i8>* undef, align 1
- store <49 x i8> undef, <49 x i8>* undef, align 1
- store <50 x i8> undef, <50 x i8>* undef, align 1
- store <51 x i8> undef, <51 x i8>* undef, align 1
- store <52 x i8> undef, <52 x i8>* undef, align 1
- store <53 x i8> undef, <53 x i8>* undef, align 1
- store <54 x i8> undef, <54 x i8>* undef, align 1
- store <55 x i8> undef, <55 x i8>* undef, align 1
- store <56 x i8> undef, <56 x i8>* undef, align 1
- store <57 x i8> undef, <57 x i8>* undef, align 1
- store <58 x i8> undef, <58 x i8>* undef, align 1
- store <59 x i8> undef, <59 x i8>* undef, align 1
- store <60 x i8> undef, <60 x i8>* undef, align 1
- store <61 x i8> undef, <61 x i8>* undef, align 1
- store <62 x i8> undef, <62 x i8>* undef, align 1
- store <63 x i8> undef, <63 x i8>* undef, align 1
+ store <33 x i8> undef, ptr undef, align 1
+ store <34 x i8> undef, ptr undef, align 1
+ store <35 x i8> undef, ptr undef, align 1
+ store <36 x i8> undef, ptr undef, align 1
+ store <37 x i8> undef, ptr undef, align 1
+ store <38 x i8> undef, ptr undef, align 1
+ store <39 x i8> undef, ptr undef, align 1
+ store <40 x i8> undef, ptr undef, align 1
+ store <41 x i8> undef, ptr undef, align 1
+ store <42 x i8> undef, ptr undef, align 1
+ store <43 x i8> undef, ptr undef, align 1
+ store <44 x i8> undef, ptr undef, align 1
+ store <45 x i8> undef, ptr undef, align 1
+ store <46 x i8> undef, ptr undef, align 1
+ store <47 x i8> undef, ptr undef, align 1
+ store <48 x i8> undef, ptr undef, align 1
+ store <49 x i8> undef, ptr undef, align 1
+ store <50 x i8> undef, ptr undef, align 1
+ store <51 x i8> undef, ptr undef, align 1
+ store <52 x i8> undef, ptr undef, align 1
+ store <53 x i8> undef, ptr undef, align 1
+ store <54 x i8> undef, ptr undef, align 1
+ store <55 x i8> undef, ptr undef, align 1
+ store <56 x i8> undef, ptr undef, align 1
+ store <57 x i8> undef, ptr undef, align 1
+ store <58 x i8> undef, ptr undef, align 1
+ store <59 x i8> undef, ptr undef, align 1
+ store <60 x i8> undef, ptr undef, align 1
+ store <61 x i8> undef, ptr undef, align 1
+ store <62 x i8> undef, ptr undef, align 1
+ store <63 x i8> undef, ptr undef, align 1
; <64 x i8> is ZMM
; Partial vectors with i1 elements
; <1 x i1> is XMM
; <2 x i1> is XMM
- store <3 x i1> undef, <3 x i1>* undef, align 1
+ store <3 x i1> undef, ptr undef, align 1
; <4 x i1> is XMM
- store <5 x i1> undef, <5 x i1>* undef, align 1
- store <6 x i1> undef, <6 x i1>* undef, align 1
- store <7 x i1> undef, <7 x i1>* undef, align 1
+ store <5 x i1> undef, ptr undef, align 1
+ store <6 x i1> undef, ptr undef, align 1
+ store <7 x i1> undef, ptr undef, align 1
; <8 x i1> is XMM
- store <9 x i1> undef, <9 x i1>* undef, align 1
- store <10 x i1> undef, <10 x i1>* undef, align 1
- store <11 x i1> undef, <11 x i1>* undef, align 1
- store <12 x i1> undef, <12 x i1>* undef, align 1
- store <13 x i1> undef, <13 x i1>* undef, align 1
- store <14 x i1> undef, <14 x i1>* undef, align 1
- store <15 x i1> undef, <15 x i1>* undef, align 1
+ store <9 x i1> undef, ptr undef, align 1
+ store <10 x i1> undef, ptr undef, align 1
+ store <11 x i1> undef, ptr undef, align 1
+ store <12 x i1> undef, ptr undef, align 1
+ store <13 x i1> undef, ptr undef, align 1
+ store <14 x i1> undef, ptr undef, align 1
+ store <15 x i1> undef, ptr undef, align 1
; <16 x i1> is XMM
- store <17 x i1> undef, <17 x i1>* undef, align 1
- store <18 x i1> undef, <18 x i1>* undef, align 1
- store <19 x i1> undef, <19 x i1>* undef, align 1
- store <20 x i1> undef, <20 x i1>* undef, align 1
- store <21 x i1> undef, <21 x i1>* undef, align 1
- store <22 x i1> undef, <22 x i1>* undef, align 1
- store <23 x i1> undef, <23 x i1>* undef, align 1
- store <24 x i1> undef, <24 x i1>* undef, align 1
- store <25 x i1> undef, <25 x i1>* undef, align 1
- store <26 x i1> undef, <26 x i1>* undef, align 1
- store <27 x i1> undef, <27 x i1>* undef, align 1
- store <28 x i1> undef, <28 x i1>* undef, align 1
- store <29 x i1> undef, <29 x i1>* undef, align 1
- store <30 x i1> undef, <30 x i1>* undef, align 1
- store <31 x i1> undef, <31 x i1>* undef, align 1
+ store <17 x i1> undef, ptr undef, align 1
+ store <18 x i1> undef, ptr undef, align 1
+ store <19 x i1> undef, ptr undef, align 1
+ store <20 x i1> undef, ptr undef, align 1
+ store <21 x i1> undef, ptr undef, align 1
+ store <22 x i1> undef, ptr undef, align 1
+ store <23 x i1> undef, ptr undef, align 1
+ store <24 x i1> undef, ptr undef, align 1
+ store <25 x i1> undef, ptr undef, align 1
+ store <26 x i1> undef, ptr undef, align 1
+ store <27 x i1> undef, ptr undef, align 1
+ store <28 x i1> undef, ptr undef, align 1
+ store <29 x i1> undef, ptr undef, align 1
+ store <30 x i1> undef, ptr undef, align 1
+ store <31 x i1> undef, ptr undef, align 1
; <32 x i1> is YMM
- store <33 x i1> undef, <33 x i1>* undef, align 1
- store <34 x i1> undef, <34 x i1>* undef, align 1
- store <35 x i1> undef, <35 x i1>* undef, align 1
- store <36 x i1> undef, <36 x i1>* undef, align 1
- store <37 x i1> undef, <37 x i1>* undef, align 1
- store <38 x i1> undef, <38 x i1>* undef, align 1
- store <39 x i1> undef, <39 x i1>* undef, align 1
- store <40 x i1> undef, <40 x i1>* undef, align 1
- store <41 x i1> undef, <41 x i1>* undef, align 1
- store <42 x i1> undef, <42 x i1>* undef, align 1
- store <43 x i1> undef, <43 x i1>* undef, align 1
- store <44 x i1> undef, <44 x i1>* undef, align 1
- store <45 x i1> undef, <45 x i1>* undef, align 1
- store <46 x i1> undef, <46 x i1>* undef, align 1
- store <47 x i1> undef, <47 x i1>* undef, align 1
- store <48 x i1> undef, <48 x i1>* undef, align 1
- store <49 x i1> undef, <49 x i1>* undef, align 1
- store <50 x i1> undef, <50 x i1>* undef, align 1
- store <51 x i1> undef, <51 x i1>* undef, align 1
- store <52 x i1> undef, <52 x i1>* undef, align 1
- store <53 x i1> undef, <53 x i1>* undef, align 1
- store <54 x i1> undef, <54 x i1>* undef, align 1
- store <55 x i1> undef, <55 x i1>* undef, align 1
- store <56 x i1> undef, <56 x i1>* undef, align 1
- store <57 x i1> undef, <57 x i1>* undef, align 1
- store <58 x i1> undef, <58 x i1>* undef, align 1
- store <59 x i1> undef, <59 x i1>* undef, align 1
- store <60 x i1> undef, <60 x i1>* undef, align 1
- store <61 x i1> undef, <61 x i1>* undef, align 1
- store <62 x i1> undef, <62 x i1>* undef, align 1
- store <63 x i1> undef, <63 x i1>* undef, align 1
+ store <33 x i1> undef, ptr undef, align 1
+ store <34 x i1> undef, ptr undef, align 1
+ store <35 x i1> undef, ptr undef, align 1
+ store <36 x i1> undef, ptr undef, align 1
+ store <37 x i1> undef, ptr undef, align 1
+ store <38 x i1> undef, ptr undef, align 1
+ store <39 x i1> undef, ptr undef, align 1
+ store <40 x i1> undef, ptr undef, align 1
+ store <41 x i1> undef, ptr undef, align 1
+ store <42 x i1> undef, ptr undef, align 1
+ store <43 x i1> undef, ptr undef, align 1
+ store <44 x i1> undef, ptr undef, align 1
+ store <45 x i1> undef, ptr undef, align 1
+ store <46 x i1> undef, ptr undef, align 1
+ store <47 x i1> undef, ptr undef, align 1
+ store <48 x i1> undef, ptr undef, align 1
+ store <49 x i1> undef, ptr undef, align 1
+ store <50 x i1> undef, ptr undef, align 1
+ store <51 x i1> undef, ptr undef, align 1
+ store <52 x i1> undef, ptr undef, align 1
+ store <53 x i1> undef, ptr undef, align 1
+ store <54 x i1> undef, ptr undef, align 1
+ store <55 x i1> undef, ptr undef, align 1
+ store <56 x i1> undef, ptr undef, align 1
+ store <57 x i1> undef, ptr undef, align 1
+ store <58 x i1> undef, ptr undef, align 1
+ store <59 x i1> undef, ptr undef, align 1
+ store <60 x i1> undef, ptr undef, align 1
+ store <61 x i1> undef, ptr undef, align 1
+ store <62 x i1> undef, ptr undef, align 1
+ store <63 x i1> undef, ptr undef, align 1
; <64 x i1> is ZMM
ret i32 undef
define i32 @stores_align64(i32 %arg) {
; Scalars
; SSE2-LABEL: 'stores_align64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, i1* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <128 x i1> undef, <128 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, <16 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, <8 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, <4 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, <2 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, <1 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <256 x i1> undef, <256 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, <8 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, <4 x double>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, <32 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <512 x i1> undef, <512 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x float> undef, <16 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x double> undef, <8 x double>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, <64 x i1>* undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store ptr undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <128 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <256 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x ptr> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <512 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <64 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x float> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x double> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x ptr> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, ptr undef, align 64
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE41-LABEL: 'stores_align64'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, i1* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <128 x i1> undef, <128 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, <16 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, <8 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, <4 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, <2 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, <1 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <256 x i1> undef, <256 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, <8 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, <4 x double>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, <32 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <512 x i1> undef, <512 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x float> undef, <16 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x double> undef, <8 x double>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, <64 x i1>* undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store ptr undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <128 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <256 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x ptr> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <512 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <64 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x float> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x double> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x ptr> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, ptr undef, align 64
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'stores_align64'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, i1* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <128 x i1> undef, <128 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, <16 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, <8 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, <4 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, <2 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, <1 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <256 x i1> undef, <256 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x float> undef, <8 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, <4 x double>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, <32 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <512 x i1> undef, <512 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x float> undef, <16 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x double> undef, <8 x double>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, <64 x i1>* undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store ptr undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <128 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <256 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x float> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x ptr> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <512 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <64 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i32> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i64> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x float> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x double> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x ptr> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, ptr undef, align 64
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'stores_align64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, i16* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, i32* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, i64* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, i128* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, i256* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, i512* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8* undef, i8** undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, i1* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <128 x i1> undef, <128 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, <1 x i128>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, <2 x double>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8*> undef, <2 x i8*>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, <16 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, <8 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, <4 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, <2 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, <1 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <256 x i1> undef, <256 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, <2 x i128>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, <1 x i256>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x float> undef, <8 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, <4 x double>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8*> undef, <4 x i8*>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, <32 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <512 x i1> undef, <512 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i8> undef, <64 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i16> undef, <32 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i32> undef, <16 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i64> undef, <8 x i64>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, <4 x i128>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, <2 x i256>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, <1 x i512>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x float> undef, <16 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x double> undef, <8 x double>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8*> undef, <8 x i8*>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, <64 x i1>* undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i256 undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store i512 undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store ptr undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i1 undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <128 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <1 x i128> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x double> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x ptr> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <256 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x i128> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <1 x i256> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x float> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x ptr> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <512 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i32> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i64> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <4 x i128> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <2 x i256> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <1 x i512> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x float> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x double> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x ptr> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <64 x i1> undef, ptr undef, align 64
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- store i8 undef, i8* undef, align 64
- store i16 undef, i16* undef, align 64
- store i32 undef, i32* undef, align 64
- store i64 undef, i64* undef, align 64
- store i128 undef, i128* undef, align 64
- store i256 undef, i256* undef, align 64
- store i512 undef, i512* undef, align 64
+ store i8 undef, ptr undef, align 64
+ store i16 undef, ptr undef, align 64
+ store i32 undef, ptr undef, align 64
+ store i64 undef, ptr undef, align 64
+ store i128 undef, ptr undef, align 64
+ store i256 undef, ptr undef, align 64
+ store i512 undef, ptr undef, align 64
- store float undef, float* undef, align 64
- store double undef, double* undef, align 64
+ store float undef, ptr undef, align 64
+ store double undef, ptr undef, align 64
- store i8* undef, i8** undef, align 64
+ store ptr undef, ptr undef, align 64
- store i1 undef, i1* undef, align 64
+ store i1 undef, ptr undef, align 64
; XMM (128-bit) vectors
- store <128 x i1> undef, <128 x i1>* undef, align 64
- store <16 x i8> undef, <16 x i8>* undef, align 64
- store <8 x i16> undef, <8 x i16>* undef, align 64
- store <4 x i32> undef, <4 x i32>* undef, align 64
- store <2 x i64> undef, <2 x i64>* undef, align 64
- store <1 x i128> undef, <1 x i128>* undef, align 64
+ store <128 x i1> undef, ptr undef, align 64
+ store <16 x i8> undef, ptr undef, align 64
+ store <8 x i16> undef, ptr undef, align 64
+ store <4 x i32> undef, ptr undef, align 64
+ store <2 x i64> undef, ptr undef, align 64
+ store <1 x i128> undef, ptr undef, align 64
- store <4 x float> undef, <4 x float>* undef, align 64
- store <2 x double> undef, <2 x double>* undef, align 64
+ store <4 x float> undef, ptr undef, align 64
+ store <2 x double> undef, ptr undef, align 64
- store <2 x i8*> undef, <2 x i8*>* undef, align 64
+ store <2 x ptr> undef, ptr undef, align 64
- store <16 x i1> undef, <16 x i1>* undef, align 64
- store <8 x i1> undef, <8 x i1>* undef, align 64
- store <4 x i1> undef, <4 x i1>* undef, align 64
- store <2 x i1> undef, <2 x i1>* undef, align 64
- store <1 x i1> undef, <1 x i1>* undef, align 64
+ store <16 x i1> undef, ptr undef, align 64
+ store <8 x i1> undef, ptr undef, align 64
+ store <4 x i1> undef, ptr undef, align 64
+ store <2 x i1> undef, ptr undef, align 64
+ store <1 x i1> undef, ptr undef, align 64
; YMM (256-bit) vectors
- store <256 x i1> undef, <256 x i1>* undef, align 64
- store <32 x i8> undef, <32 x i8>* undef, align 64
- store <16 x i16> undef, <16 x i16>* undef, align 64
- store <8 x i32> undef, <8 x i32>* undef, align 64
- store <4 x i64> undef, <4 x i64>* undef, align 64
- store <2 x i128> undef, <2 x i128>* undef, align 64
- store <1 x i256> undef, <1 x i256>* undef, align 64
+ store <256 x i1> undef, ptr undef, align 64
+ store <32 x i8> undef, ptr undef, align 64
+ store <16 x i16> undef, ptr undef, align 64
+ store <8 x i32> undef, ptr undef, align 64
+ store <4 x i64> undef, ptr undef, align 64
+ store <2 x i128> undef, ptr undef, align 64
+ store <1 x i256> undef, ptr undef, align 64
- store <8 x float> undef, <8 x float>* undef, align 64
- store <4 x double> undef, <4 x double>* undef, align 64
+ store <8 x float> undef, ptr undef, align 64
+ store <4 x double> undef, ptr undef, align 64
- store <4 x i8*> undef, <4 x i8*>* undef, align 64
+ store <4 x ptr> undef, ptr undef, align 64
- store <32 x i1> undef, <32 x i1>* undef, align 64
+ store <32 x i1> undef, ptr undef, align 64
; ZMM (512-bit) vectors
- store <512 x i1> undef, <512 x i1>* undef, align 64
- store <64 x i8> undef, <64 x i8>* undef, align 64
- store <32 x i16> undef, <32 x i16>* undef, align 64
- store <16 x i32> undef, <16 x i32>* undef, align 64
- store <8 x i64> undef, <8 x i64>* undef, align 64
- store <4 x i128> undef, <4 x i128>* undef, align 64
- store <2 x i256> undef, <2 x i256>* undef, align 64
- store <1 x i512> undef, <1 x i512>* undef, align 64
+ store <512 x i1> undef, ptr undef, align 64
+ store <64 x i8> undef, ptr undef, align 64
+ store <32 x i16> undef, ptr undef, align 64
+ store <16 x i32> undef, ptr undef, align 64
+ store <8 x i64> undef, ptr undef, align 64
+ store <4 x i128> undef, ptr undef, align 64
+ store <2 x i256> undef, ptr undef, align 64
+ store <1 x i512> undef, ptr undef, align 64
- store <16 x float> undef, <16 x float>* undef, align 64
- store <8 x double> undef, <8 x double>* undef, align 64
+ store <16 x float> undef, ptr undef, align 64
+ store <8 x double> undef, ptr undef, align 64
- store <8 x i8*> undef, <8 x i8*>* undef, align 64
+ store <8 x ptr> undef, ptr undef, align 64
- store <64 x i1> undef, <64 x i1>* undef, align 64
+ store <64 x i1> undef, ptr undef, align 64
ret i32 undef
}
define i32 @stores_partial_align64(i32 %arg) {
; Partial vectors with i64 elements (doubles as pointer-sized tests))
; SSE2-LABEL: 'stores_partial_align64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, <3 x double>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, <5 x double>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, <6 x double>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, <7 x double>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x float> undef, <5 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, <6 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x float> undef, <7 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, <9 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, <10 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, <11 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, <12 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, <13 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, <14 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, <15 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, <3 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, <5 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, <6 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, <7 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i1> undef, <9 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <10 x i1> undef, <10 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i1> undef, <11 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <12 x i1> undef, <12 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i1> undef, <13 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i1> undef, <14 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <15 x i1> undef, <15 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i1> undef, <17 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i1> undef, <18 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <19 x i1> undef, <19 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i1> undef, <20 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <21 x i1> undef, <21 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <22 x i1> undef, <22 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <23 x i1> undef, <23 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i1> undef, <24 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, <25 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, <26 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, <27 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, <28 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, <29 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, <30 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, <31 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, <33 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, <34 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, <35 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, <36 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, <37 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, <38 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, <39 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, <40 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i1> undef, <41 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i1> undef, <42 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <43 x i1> undef, <43 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i1> undef, <44 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <45 x i1> undef, <45 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <46 x i1> undef, <46 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <47 x i1> undef, <47 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, <48 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <49 x i1> undef, <49 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <50 x i1> undef, <50 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <51 x i1> undef, <51 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <52 x i1> undef, <52 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <53 x i1> undef, <53 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <54 x i1> undef, <54 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <55 x i1> undef, <55 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <56 x i1> undef, <56 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i1> undef, <57 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i1> undef, <58 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <59 x i1> undef, <59 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i1> undef, <60 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <61 x i1> undef, <61 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <62 x i1> undef, <62 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <63 x i1> undef, <63 x i1>* undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x i64> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i64> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i32> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i32> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i32> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i32> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i32> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i32> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x float> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x float> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <6 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <7 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <22 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <3 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <7 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <12 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <13 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <14 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <15 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <19 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <22 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <29 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <31 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <38 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <39 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <41 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <43 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <44 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <45 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <46 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <47 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <49 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <51 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <54 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <55 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <59 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <60 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <61 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: store <63 x i8> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <10 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <12 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <15 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <19 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <21 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <22 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <23 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <43 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <45 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <46 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <47 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <49 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <50 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <51 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <52 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <53 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <54 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <55 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <56 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <59 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <61 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <62 x i1> undef, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <63 x i1> undef, ptr undef, align 64
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE41-LABEL: 'stores_partial_align64'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, <3 x double>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, <5 x double>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, <6 x double>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, <7 x double>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x float> undef, <5 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, <6 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x float> undef, <7 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, <9 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, <10 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, <11 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, <12 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, <13 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, <14 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, <15 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, <3 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, <5 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, <6 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, <7 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, <9 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, <10 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, <11 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, <12 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, <13 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, <14 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, <15 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i1> undef, <17 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i1> undef, <18 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <19 x i1> undef, <19 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i1> undef, <20 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <21 x i1> undef, <21 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <22 x i1> undef, <22 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <23 x i1> undef, <23 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i1> undef, <24 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i1> undef, <25 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i1> undef, <26 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <27 x i1> undef, <27 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i1> undef, <28 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <29 x i1> undef, <29 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <30 x i1> undef, <30 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <31 x i1> undef, <31 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, <33 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, <34 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, <35 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, <36 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, <37 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, <38 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, <39 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, <40 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <41 x i1> undef, <41 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <42 x i1> undef, <42 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <43 x i1> undef, <43 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <44 x i1> undef, <44 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <45 x i1> undef, <45 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <46 x i1> undef, <46 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <47 x i1> undef, <47 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, <48 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <49 x i1> undef, <49 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <50 x i1> undef, <50 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <51 x i1> undef, <51 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <52 x i1> undef, <52 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <53 x i1> undef, <53 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <54 x i1> undef, <54 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <55 x i1> undef, <55 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <56 x i1> undef, <56 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <57 x i1> undef, <57 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <58 x i1> undef, <58 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <59 x i1> undef, <59 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <60 x i1> undef, <60 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <61 x i1> undef, <61 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <62 x i1> undef, <62 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <63 x i1> undef, <63 x i1>* undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x i64> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i64> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <3 x double> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i32> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i32> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i32> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i32> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i32> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x i32> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x float> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x float> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x float> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <19 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <22 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <27 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <30 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <31 x i16> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <19 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <21 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <22 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <27 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <30 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <31 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <33 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <34 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <35 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <36 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <37 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <38 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <39 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <43 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <46 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <47 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <49 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <50 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <51 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <52 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <53 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <54 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <55 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <59 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <61 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <62 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <63 x i8> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <19 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <21 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <22 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <23 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <25 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <26 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <27 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <29 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <30 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <31 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <41 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <42 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <43 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <44 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <45 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <46 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <47 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <49 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <50 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <51 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <52 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <53 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <54 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <55 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <56 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <57 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <58 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <59 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <60 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <61 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <62 x i1> undef, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <63 x i1> undef, ptr undef, align 64
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'stores_partial_align64'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x double> undef, <5 x double>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x double> undef, <6 x double>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, <7 x double>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x float> undef, <5 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x float> undef, <6 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x float> undef, <7 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x float> undef, <9 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x float> undef, <10 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x float> undef, <11 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x float> undef, <12 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, <13 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, <14 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, <15 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, <3 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, <5 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, <6 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, <7 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, <9 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, <10 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, <11 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, <12 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, <13 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, <14 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, <15 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i1> undef, <17 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i1> undef, <18 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <19 x i1> undef, <19 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i1> undef, <20 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <21 x i1> undef, <21 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <22 x i1> undef, <22 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <23 x i1> undef, <23 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i1> undef, <24 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, <25 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, <26 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, <27 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, <28 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, <29 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, <30 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, <31 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, <33 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, <34 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, <35 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, <36 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, <37 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, <38 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, <39 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, <40 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <41 x i1> undef, <41 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <42 x i1> undef, <42 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <43 x i1> undef, <43 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <44 x i1> undef, <44 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <45 x i1> undef, <45 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <46 x i1> undef, <46 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <47 x i1> undef, <47 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, <48 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <49 x i1> undef, <49 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <50 x i1> undef, <50 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <51 x i1> undef, <51 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <52 x i1> undef, <52 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <53 x i1> undef, <53 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <54 x i1> undef, <54 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <55 x i1> undef, <55 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i1> undef, <56 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i1> undef, <57 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i1> undef, <58 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <59 x i1> undef, <59 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i1> undef, <60 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <61 x i1> undef, <61 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <62 x i1> undef, <62 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <63 x i1> undef, <63 x i1>* undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x i64> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i64> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x i64> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <5 x double> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x double> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <7 x double> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i32> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x i32> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x i32> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x i32> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i32> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i32> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x i32> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x i32> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x float> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x float> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x float> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <9 x float> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <10 x float> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <11 x float> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x float> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <13 x float> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <14 x float> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <15 x float> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <10 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <17 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <18 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <19 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <20 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <21 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <22 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <23 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <28 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <29 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <30 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <31 x i16> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <18 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <19 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <20 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <21 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <22 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <29 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <35 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <37 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <38 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <39 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <41 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <42 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <43 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <44 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <45 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <46 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <47 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <50 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <51 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <52 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <53 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <54 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <55 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <59 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <61 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <62 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <63 x i8> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <19 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <21 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <22 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <23 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <33 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <34 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <35 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <36 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <37 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <38 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <39 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <40 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <41 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <42 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <43 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <44 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <45 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <46 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <47 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <48 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <49 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <50 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <51 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <52 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <53 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <54 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <55 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <56 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <57 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <58 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <59 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <60 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <61 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <62 x i1> undef, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <63 x i1> undef, ptr undef, align 64
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'stores_partial_align64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, <1 x i64>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, <3 x i64>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, <5 x i64>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, <6 x i64>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i64> undef, <7 x i64>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, <1 x double>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, <3 x double>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, <5 x double>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, <6 x double>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x double> undef, <7 x double>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, <1 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, <3 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, <5 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i32> undef, <6 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, <7 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i32> undef, <9 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, <10 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i32> undef, <11 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, <12 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <13 x i32> undef, <13 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i32> undef, <14 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i32> undef, <15 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, <1 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, <2 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, <3 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x float> undef, <5 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x float> undef, <6 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x float> undef, <7 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, <9 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, <10 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, <11 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, <12 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x float> undef, <13 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x float> undef, <14 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x float> undef, <15 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, <1 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, <3 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, <4 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, <5 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, <6 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, <7 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, <9 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <10 x i16> undef, <10 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, <11 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, <12 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, <13 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, <14 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, <15 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, <17 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <18 x i16> undef, <18 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <19 x i16> undef, <19 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, <20 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, <21 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <22 x i16> undef, <22 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, <23 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, <24 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <25 x i16> undef, <25 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <26 x i16> undef, <26 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <27 x i16> undef, <27 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i16> undef, <28 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <29 x i16> undef, <29 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i16> undef, <30 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, <31 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, <1 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, <3 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, <4 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, <5 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, <6 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, <7 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, <8 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, <9 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, <10 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, <11 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, <12 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, <13 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, <14 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, <15 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, <17 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <18 x i8> undef, <18 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <19 x i8> undef, <19 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <20 x i8> undef, <20 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <21 x i8> undef, <21 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <22 x i8> undef, <22 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i8> undef, <23 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i8> undef, <24 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, <25 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, <26 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, <27 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, <28 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <29 x i8> undef, <29 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, <30 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i8> undef, <31 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, <33 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <34 x i8> undef, <34 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, <35 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <36 x i8> undef, <36 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, <37 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <38 x i8> undef, <38 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <39 x i8> undef, <39 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, <40 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, <41 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, <42 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <43 x i8> undef, <43 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i8> undef, <44 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, <45 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <46 x i8> undef, <46 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <47 x i8> undef, <47 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, <48 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <49 x i8> undef, <49 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <50 x i8> undef, <50 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <51 x i8> undef, <51 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <52 x i8> undef, <52 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <53 x i8> undef, <53 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <54 x i8> undef, <54 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <55 x i8> undef, <55 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <56 x i8> undef, <56 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, <57 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <58 x i8> undef, <58 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <59 x i8> undef, <59 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <60 x i8> undef, <60 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <61 x i8> undef, <61 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, <62 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <63 x i8> undef, <63 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, <3 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, <5 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, <6 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, <7 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, <9 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, <10 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, <11 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, <12 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, <13 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, <14 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, <15 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i1> undef, <17 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i1> undef, <18 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <19 x i1> undef, <19 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i1> undef, <20 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <21 x i1> undef, <21 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <22 x i1> undef, <22 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <23 x i1> undef, <23 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i1> undef, <24 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, <25 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, <26 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, <27 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, <28 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, <29 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, <30 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, <31 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <33 x i1> undef, <33 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <34 x i1> undef, <34 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <35 x i1> undef, <35 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <36 x i1> undef, <36 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <37 x i1> undef, <37 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <38 x i1> undef, <38 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <39 x i1> undef, <39 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i1> undef, <40 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i1> undef, <41 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i1> undef, <42 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <43 x i1> undef, <43 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i1> undef, <44 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <45 x i1> undef, <45 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <46 x i1> undef, <46 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <47 x i1> undef, <47 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i1> undef, <48 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i1> undef, <49 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <50 x i1> undef, <50 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <51 x i1> undef, <51 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <52 x i1> undef, <52 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <53 x i1> undef, <53 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <54 x i1> undef, <54 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <55 x i1> undef, <55 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <56 x i1> undef, <56 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <57 x i1> undef, <57 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <58 x i1> undef, <58 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <59 x i1> undef, <59 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <60 x i1> undef, <60 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <61 x i1> undef, <61 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <62 x i1> undef, <62 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <63 x i1> undef, <63 x i1>* undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i64> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i64> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i64> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i64> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i64> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x double> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x double> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x double> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x double> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x double> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i32> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i32> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <5 x i32> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i32> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i32> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i32> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i32> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i32> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i32> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <13 x i32> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i32> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i32> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x float> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x float> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x float> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x float> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x float> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x float> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x float> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x float> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x float> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x float> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x float> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x float> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <9 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <10 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <11 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <17 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <18 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <19 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <21 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <22 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <23 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <25 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <26 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <27 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <29 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i16> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <1 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <3 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <5 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <6 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <7 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <11 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <13 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <14 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <15 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <17 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <18 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <19 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <20 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <21 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <22 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <23 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <27 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <29 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <30 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <31 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <33 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <34 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <35 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <36 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <37 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <38 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <39 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <43 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <45 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <46 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <47 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <49 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <50 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <51 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <52 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <53 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <54 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <55 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <56 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <57 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <58 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <59 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <60 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <61 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: store <62 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: store <63 x i8> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <3 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <5 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <7 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <9 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <10 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <11 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <12 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <13 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <14 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <15 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <17 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <18 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <19 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <20 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <21 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <22 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <23 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <24 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <25 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <26 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <27 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <28 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <29 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <30 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <31 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <33 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <34 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <35 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <36 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <37 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <38 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <39 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <40 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <41 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <42 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <43 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <44 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <45 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <46 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <47 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <48 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <49 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <50 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <51 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <52 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <53 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <54 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <55 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: store <56 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <57 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <58 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <59 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <60 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <61 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <62 x i1> undef, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: store <63 x i1> undef, ptr undef, align 64
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- store <1 x i64> undef, <1 x i64>* undef, align 64
+ store <1 x i64> undef, ptr undef, align 64
; <2 x i64> is XMM
- store <3 x i64> undef, <3 x i64>* undef, align 64
+ store <3 x i64> undef, ptr undef, align 64
; <4 x i64> is YMM
- store <5 x i64> undef, <5 x i64>* undef, align 64
- store <6 x i64> undef, <6 x i64>* undef, align 64
- store <7 x i64> undef, <7 x i64>* undef, align 64
+ store <5 x i64> undef, ptr undef, align 64
+ store <6 x i64> undef, ptr undef, align 64
+ store <7 x i64> undef, ptr undef, align 64
; <8 x i64> is ZMM
; Partial vectors with double elements
- store <1 x double> undef, <1 x double>* undef, align 64
+ store <1 x double> undef, ptr undef, align 64
; <2 x double> is XMM
- store <3 x double> undef, <3 x double>* undef, align 64
+ store <3 x double> undef, ptr undef, align 64
; <4 x double> is YMM
- store <5 x double> undef, <5 x double>* undef, align 64
- store <6 x double> undef, <6 x double>* undef, align 64
- store <7 x double> undef, <7 x double>* undef, align 64
+ store <5 x double> undef, ptr undef, align 64
+ store <6 x double> undef, ptr undef, align 64
+ store <7 x double> undef, ptr undef, align 64
; <8 x double> is ZMM
; Partial vectors with i32 elements
- store <1 x i32> undef, <1 x i32>* undef, align 64
- store <2 x i32> undef, <2 x i32>* undef, align 64
- store <3 x i32> undef, <3 x i32>* undef, align 64
+ store <1 x i32> undef, ptr undef, align 64
+ store <2 x i32> undef, ptr undef, align 64
+ store <3 x i32> undef, ptr undef, align 64
; <4 x i32> is XMM
- store <5 x i32> undef, <5 x i32>* undef, align 64
- store <6 x i32> undef, <6 x i32>* undef, align 64
- store <7 x i32> undef, <7 x i32>* undef, align 64
+ store <5 x i32> undef, ptr undef, align 64
+ store <6 x i32> undef, ptr undef, align 64
+ store <7 x i32> undef, ptr undef, align 64
; <8 x i32> is YMM
- store <9 x i32> undef, <9 x i32>* undef, align 64
- store <10 x i32> undef, <10 x i32>* undef, align 64
- store <11 x i32> undef, <11 x i32>* undef, align 64
- store <12 x i32> undef, <12 x i32>* undef, align 64
- store <13 x i32> undef, <13 x i32>* undef, align 64
- store <14 x i32> undef, <14 x i32>* undef, align 64
- store <15 x i32> undef, <15 x i32>* undef, align 64
+ store <9 x i32> undef, ptr undef, align 64
+ store <10 x i32> undef, ptr undef, align 64
+ store <11 x i32> undef, ptr undef, align 64
+ store <12 x i32> undef, ptr undef, align 64
+ store <13 x i32> undef, ptr undef, align 64
+ store <14 x i32> undef, ptr undef, align 64
+ store <15 x i32> undef, ptr undef, align 64
; <16 x i32> is ZMM
; Partial vectors with float elements
- store <1 x float> undef, <1 x float>* undef, align 64
- store <2 x float> undef, <2 x float>* undef, align 64
- store <3 x float> undef, <3 x float>* undef, align 64
+ store <1 x float> undef, ptr undef, align 64
+ store <2 x float> undef, ptr undef, align 64
+ store <3 x float> undef, ptr undef, align 64
; <4 x float> is XMM
- store <5 x float> undef, <5 x float>* undef, align 64
- store <6 x float> undef, <6 x float>* undef, align 64
- store <7 x float> undef, <7 x float>* undef, align 64
+ store <5 x float> undef, ptr undef, align 64
+ store <6 x float> undef, ptr undef, align 64
+ store <7 x float> undef, ptr undef, align 64
; <8 x float> is YMM
- store <9 x float> undef, <9 x float>* undef, align 64
- store <10 x float> undef, <10 x float>* undef, align 64
- store <11 x float> undef, <11 x float>* undef, align 64
- store <12 x float> undef, <12 x float>* undef, align 64
- store <13 x float> undef, <13 x float>* undef, align 64
- store <14 x float> undef, <14 x float>* undef, align 64
- store <15 x float> undef, <15 x float>* undef, align 64
+ store <9 x float> undef, ptr undef, align 64
+ store <10 x float> undef, ptr undef, align 64
+ store <11 x float> undef, ptr undef, align 64
+ store <12 x float> undef, ptr undef, align 64
+ store <13 x float> undef, ptr undef, align 64
+ store <14 x float> undef, ptr undef, align 64
+ store <15 x float> undef, ptr undef, align 64
; <16 x float> is ZMM
; Partial vectors with i16 elements
- store <1 x i16> undef, <1 x i16>* undef, align 64
- store <2 x i16> undef, <2 x i16>* undef, align 64
- store <3 x i16> undef, <3 x i16>* undef, align 64
- store <4 x i16> undef, <4 x i16>* undef, align 64
- store <5 x i16> undef, <5 x i16>* undef, align 64
- store <6 x i16> undef, <6 x i16>* undef, align 64
- store <7 x i16> undef, <7 x i16>* undef, align 64
+ store <1 x i16> undef, ptr undef, align 64
+ store <2 x i16> undef, ptr undef, align 64
+ store <3 x i16> undef, ptr undef, align 64
+ store <4 x i16> undef, ptr undef, align 64
+ store <5 x i16> undef, ptr undef, align 64
+ store <6 x i16> undef, ptr undef, align 64
+ store <7 x i16> undef, ptr undef, align 64
; <8 x i16> is XMM
- store <9 x i16> undef, <9 x i16>* undef, align 64
- store <10 x i16> undef, <10 x i16>* undef, align 64
- store <11 x i16> undef, <11 x i16>* undef, align 64
- store <12 x i16> undef, <12 x i16>* undef, align 64
- store <13 x i16> undef, <13 x i16>* undef, align 64
- store <14 x i16> undef, <14 x i16>* undef, align 64
- store <15 x i16> undef, <15 x i16>* undef, align 64
+ store <9 x i16> undef, ptr undef, align 64
+ store <10 x i16> undef, ptr undef, align 64
+ store <11 x i16> undef, ptr undef, align 64
+ store <12 x i16> undef, ptr undef, align 64
+ store <13 x i16> undef, ptr undef, align 64
+ store <14 x i16> undef, ptr undef, align 64
+ store <15 x i16> undef, ptr undef, align 64
; <16 x i16> is YMM
- store <17 x i16> undef, <17 x i16>* undef, align 64
- store <18 x i16> undef, <18 x i16>* undef, align 64
- store <19 x i16> undef, <19 x i16>* undef, align 64
- store <20 x i16> undef, <20 x i16>* undef, align 64
- store <21 x i16> undef, <21 x i16>* undef, align 64
- store <22 x i16> undef, <22 x i16>* undef, align 64
- store <23 x i16> undef, <23 x i16>* undef, align 64
- store <24 x i16> undef, <24 x i16>* undef, align 64
- store <25 x i16> undef, <25 x i16>* undef, align 64
- store <26 x i16> undef, <26 x i16>* undef, align 64
- store <27 x i16> undef, <27 x i16>* undef, align 64
- store <28 x i16> undef, <28 x i16>* undef, align 64
- store <29 x i16> undef, <29 x i16>* undef, align 64
- store <30 x i16> undef, <30 x i16>* undef, align 64
- store <31 x i16> undef, <31 x i16>* undef, align 64
+ store <17 x i16> undef, ptr undef, align 64
+ store <18 x i16> undef, ptr undef, align 64
+ store <19 x i16> undef, ptr undef, align 64
+ store <20 x i16> undef, ptr undef, align 64
+ store <21 x i16> undef, ptr undef, align 64
+ store <22 x i16> undef, ptr undef, align 64
+ store <23 x i16> undef, ptr undef, align 64
+ store <24 x i16> undef, ptr undef, align 64
+ store <25 x i16> undef, ptr undef, align 64
+ store <26 x i16> undef, ptr undef, align 64
+ store <27 x i16> undef, ptr undef, align 64
+ store <28 x i16> undef, ptr undef, align 64
+ store <29 x i16> undef, ptr undef, align 64
+ store <30 x i16> undef, ptr undef, align 64
+ store <31 x i16> undef, ptr undef, align 64
; <32 x i16> is ZMM
; Partial vectors with i8 elements
- store <1 x i8> undef, <1 x i8>* undef, align 64
- store <2 x i8> undef, <2 x i8>* undef, align 64
- store <3 x i8> undef, <3 x i8>* undef, align 64
- store <4 x i8> undef, <4 x i8>* undef, align 64
- store <5 x i8> undef, <5 x i8>* undef, align 64
- store <6 x i8> undef, <6 x i8>* undef, align 64
- store <7 x i8> undef, <7 x i8>* undef, align 64
- store <8 x i8> undef, <8 x i8>* undef, align 64
- store <9 x i8> undef, <9 x i8>* undef, align 64
- store <10 x i8> undef, <10 x i8>* undef, align 64
- store <11 x i8> undef, <11 x i8>* undef, align 64
- store <12 x i8> undef, <12 x i8>* undef, align 64
- store <13 x i8> undef, <13 x i8>* undef, align 64
- store <14 x i8> undef, <14 x i8>* undef, align 64
- store <15 x i8> undef, <15 x i8>* undef, align 64
+ store <1 x i8> undef, ptr undef, align 64
+ store <2 x i8> undef, ptr undef, align 64
+ store <3 x i8> undef, ptr undef, align 64
+ store <4 x i8> undef, ptr undef, align 64
+ store <5 x i8> undef, ptr undef, align 64
+ store <6 x i8> undef, ptr undef, align 64
+ store <7 x i8> undef, ptr undef, align 64
+ store <8 x i8> undef, ptr undef, align 64
+ store <9 x i8> undef, ptr undef, align 64
+ store <10 x i8> undef, ptr undef, align 64
+ store <11 x i8> undef, ptr undef, align 64
+ store <12 x i8> undef, ptr undef, align 64
+ store <13 x i8> undef, ptr undef, align 64
+ store <14 x i8> undef, ptr undef, align 64
+ store <15 x i8> undef, ptr undef, align 64
; <16 x i8> is XMM
- store <17 x i8> undef, <17 x i8>* undef, align 64
- store <18 x i8> undef, <18 x i8>* undef, align 64
- store <19 x i8> undef, <19 x i8>* undef, align 64
- store <20 x i8> undef, <20 x i8>* undef, align 64
- store <21 x i8> undef, <21 x i8>* undef, align 64
- store <22 x i8> undef, <22 x i8>* undef, align 64
- store <23 x i8> undef, <23 x i8>* undef, align 64
- store <24 x i8> undef, <24 x i8>* undef, align 64
- store <25 x i8> undef, <25 x i8>* undef, align 64
- store <26 x i8> undef, <26 x i8>* undef, align 64
- store <27 x i8> undef, <27 x i8>* undef, align 64
- store <28 x i8> undef, <28 x i8>* undef, align 64
- store <29 x i8> undef, <29 x i8>* undef, align 64
- store <30 x i8> undef, <30 x i8>* undef, align 64
- store <31 x i8> undef, <31 x i8>* undef, align 64
+ store <17 x i8> undef, ptr undef, align 64
+ store <18 x i8> undef, ptr undef, align 64
+ store <19 x i8> undef, ptr undef, align 64
+ store <20 x i8> undef, ptr undef, align 64
+ store <21 x i8> undef, ptr undef, align 64
+ store <22 x i8> undef, ptr undef, align 64
+ store <23 x i8> undef, ptr undef, align 64
+ store <24 x i8> undef, ptr undef, align 64
+ store <25 x i8> undef, ptr undef, align 64
+ store <26 x i8> undef, ptr undef, align 64
+ store <27 x i8> undef, ptr undef, align 64
+ store <28 x i8> undef, ptr undef, align 64
+ store <29 x i8> undef, ptr undef, align 64
+ store <30 x i8> undef, ptr undef, align 64
+ store <31 x i8> undef, ptr undef, align 64
; <32 x i8> is YMM
- store <33 x i8> undef, <33 x i8>* undef, align 64
- store <34 x i8> undef, <34 x i8>* undef, align 64
- store <35 x i8> undef, <35 x i8>* undef, align 64
- store <36 x i8> undef, <36 x i8>* undef, align 64
- store <37 x i8> undef, <37 x i8>* undef, align 64
- store <38 x i8> undef, <38 x i8>* undef, align 64
- store <39 x i8> undef, <39 x i8>* undef, align 64
- store <40 x i8> undef, <40 x i8>* undef, align 64
- store <41 x i8> undef, <41 x i8>* undef, align 64
- store <42 x i8> undef, <42 x i8>* undef, align 64
- store <43 x i8> undef, <43 x i8>* undef, align 64
- store <44 x i8> undef, <44 x i8>* undef, align 64
- store <45 x i8> undef, <45 x i8>* undef, align 64
- store <46 x i8> undef, <46 x i8>* undef, align 64
- store <47 x i8> undef, <47 x i8>* undef, align 64
- store <48 x i8> undef, <48 x i8>* undef, align 64
- store <49 x i8> undef, <49 x i8>* undef, align 64
- store <50 x i8> undef, <50 x i8>* undef, align 64
- store <51 x i8> undef, <51 x i8>* undef, align 64
- store <52 x i8> undef, <52 x i8>* undef, align 64
- store <53 x i8> undef, <53 x i8>* undef, align 64
- store <54 x i8> undef, <54 x i8>* undef, align 64
- store <55 x i8> undef, <55 x i8>* undef, align 64
- store <56 x i8> undef, <56 x i8>* undef, align 64
- store <57 x i8> undef, <57 x i8>* undef, align 64
- store <58 x i8> undef, <58 x i8>* undef, align 64
- store <59 x i8> undef, <59 x i8>* undef, align 64
- store <60 x i8> undef, <60 x i8>* undef, align 64
- store <61 x i8> undef, <61 x i8>* undef, align 64
- store <62 x i8> undef, <62 x i8>* undef, align 64
- store <63 x i8> undef, <63 x i8>* undef, align 64
+ store <33 x i8> undef, ptr undef, align 64
+ store <34 x i8> undef, ptr undef, align 64
+ store <35 x i8> undef, ptr undef, align 64
+ store <36 x i8> undef, ptr undef, align 64
+ store <37 x i8> undef, ptr undef, align 64
+ store <38 x i8> undef, ptr undef, align 64
+ store <39 x i8> undef, ptr undef, align 64
+ store <40 x i8> undef, ptr undef, align 64
+ store <41 x i8> undef, ptr undef, align 64
+ store <42 x i8> undef, ptr undef, align 64
+ store <43 x i8> undef, ptr undef, align 64
+ store <44 x i8> undef, ptr undef, align 64
+ store <45 x i8> undef, ptr undef, align 64
+ store <46 x i8> undef, ptr undef, align 64
+ store <47 x i8> undef, ptr undef, align 64
+ store <48 x i8> undef, ptr undef, align 64
+ store <49 x i8> undef, ptr undef, align 64
+ store <50 x i8> undef, ptr undef, align 64
+ store <51 x i8> undef, ptr undef, align 64
+ store <52 x i8> undef, ptr undef, align 64
+ store <53 x i8> undef, ptr undef, align 64
+ store <54 x i8> undef, ptr undef, align 64
+ store <55 x i8> undef, ptr undef, align 64
+ store <56 x i8> undef, ptr undef, align 64
+ store <57 x i8> undef, ptr undef, align 64
+ store <58 x i8> undef, ptr undef, align 64
+ store <59 x i8> undef, ptr undef, align 64
+ store <60 x i8> undef, ptr undef, align 64
+ store <61 x i8> undef, ptr undef, align 64
+ store <62 x i8> undef, ptr undef, align 64
+ store <63 x i8> undef, ptr undef, align 64
; <64 x i8> is ZMM
; Partial vectors with i1 elements
; <1 x i1> is XMM
; <2 x i1> is XMM
- store <3 x i1> undef, <3 x i1>* undef, align 64
+ store <3 x i1> undef, ptr undef, align 64
; <4 x i1> is XMM
- store <5 x i1> undef, <5 x i1>* undef, align 64
- store <6 x i1> undef, <6 x i1>* undef, align 64
- store <7 x i1> undef, <7 x i1>* undef, align 64
+ store <5 x i1> undef, ptr undef, align 64
+ store <6 x i1> undef, ptr undef, align 64
+ store <7 x i1> undef, ptr undef, align 64
; <8 x i1> is XMM
- store <9 x i1> undef, <9 x i1>* undef, align 64
- store <10 x i1> undef, <10 x i1>* undef, align 64
- store <11 x i1> undef, <11 x i1>* undef, align 64
- store <12 x i1> undef, <12 x i1>* undef, align 64
- store <13 x i1> undef, <13 x i1>* undef, align 64
- store <14 x i1> undef, <14 x i1>* undef, align 64
- store <15 x i1> undef, <15 x i1>* undef, align 64
+ store <9 x i1> undef, ptr undef, align 64
+ store <10 x i1> undef, ptr undef, align 64
+ store <11 x i1> undef, ptr undef, align 64
+ store <12 x i1> undef, ptr undef, align 64
+ store <13 x i1> undef, ptr undef, align 64
+ store <14 x i1> undef, ptr undef, align 64
+ store <15 x i1> undef, ptr undef, align 64
; <16 x i1> is XMM
- store <17 x i1> undef, <17 x i1>* undef, align 64
- store <18 x i1> undef, <18 x i1>* undef, align 64
- store <19 x i1> undef, <19 x i1>* undef, align 64
- store <20 x i1> undef, <20 x i1>* undef, align 64
- store <21 x i1> undef, <21 x i1>* undef, align 64
- store <22 x i1> undef, <22 x i1>* undef, align 64
- store <23 x i1> undef, <23 x i1>* undef, align 64
- store <24 x i1> undef, <24 x i1>* undef, align 64
- store <25 x i1> undef, <25 x i1>* undef, align 64
- store <26 x i1> undef, <26 x i1>* undef, align 64
- store <27 x i1> undef, <27 x i1>* undef, align 64
- store <28 x i1> undef, <28 x i1>* undef, align 64
- store <29 x i1> undef, <29 x i1>* undef, align 64
- store <30 x i1> undef, <30 x i1>* undef, align 64
- store <31 x i1> undef, <31 x i1>* undef, align 64
+ store <17 x i1> undef, ptr undef, align 64
+ store <18 x i1> undef, ptr undef, align 64
+ store <19 x i1> undef, ptr undef, align 64
+ store <20 x i1> undef, ptr undef, align 64
+ store <21 x i1> undef, ptr undef, align 64
+ store <22 x i1> undef, ptr undef, align 64
+ store <23 x i1> undef, ptr undef, align 64
+ store <24 x i1> undef, ptr undef, align 64
+ store <25 x i1> undef, ptr undef, align 64
+ store <26 x i1> undef, ptr undef, align 64
+ store <27 x i1> undef, ptr undef, align 64
+ store <28 x i1> undef, ptr undef, align 64
+ store <29 x i1> undef, ptr undef, align 64
+ store <30 x i1> undef, ptr undef, align 64
+ store <31 x i1> undef, ptr undef, align 64
; <32 x i1> is YMM
- store <33 x i1> undef, <33 x i1>* undef, align 64
- store <34 x i1> undef, <34 x i1>* undef, align 64
- store <35 x i1> undef, <35 x i1>* undef, align 64
- store <36 x i1> undef, <36 x i1>* undef, align 64
- store <37 x i1> undef, <37 x i1>* undef, align 64
- store <38 x i1> undef, <38 x i1>* undef, align 64
- store <39 x i1> undef, <39 x i1>* undef, align 64
- store <40 x i1> undef, <40 x i1>* undef, align 64
- store <41 x i1> undef, <41 x i1>* undef, align 64
- store <42 x i1> undef, <42 x i1>* undef, align 64
- store <43 x i1> undef, <43 x i1>* undef, align 64
- store <44 x i1> undef, <44 x i1>* undef, align 64
- store <45 x i1> undef, <45 x i1>* undef, align 64
- store <46 x i1> undef, <46 x i1>* undef, align 64
- store <47 x i1> undef, <47 x i1>* undef, align 64
- store <48 x i1> undef, <48 x i1>* undef, align 64
- store <49 x i1> undef, <49 x i1>* undef, align 64
- store <50 x i1> undef, <50 x i1>* undef, align 64
- store <51 x i1> undef, <51 x i1>* undef, align 64
- store <52 x i1> undef, <52 x i1>* undef, align 64
- store <53 x i1> undef, <53 x i1>* undef, align 64
- store <54 x i1> undef, <54 x i1>* undef, align 64
- store <55 x i1> undef, <55 x i1>* undef, align 64
- store <56 x i1> undef, <56 x i1>* undef, align 64
- store <57 x i1> undef, <57 x i1>* undef, align 64
- store <58 x i1> undef, <58 x i1>* undef, align 64
- store <59 x i1> undef, <59 x i1>* undef, align 64
- store <60 x i1> undef, <60 x i1>* undef, align 64
- store <61 x i1> undef, <61 x i1>* undef, align 64
- store <62 x i1> undef, <62 x i1>* undef, align 64
- store <63 x i1> undef, <63 x i1>* undef, align 64
+ store <33 x i1> undef, ptr undef, align 64
+ store <34 x i1> undef, ptr undef, align 64
+ store <35 x i1> undef, ptr undef, align 64
+ store <36 x i1> undef, ptr undef, align 64
+ store <37 x i1> undef, ptr undef, align 64
+ store <38 x i1> undef, ptr undef, align 64
+ store <39 x i1> undef, ptr undef, align 64
+ store <40 x i1> undef, ptr undef, align 64
+ store <41 x i1> undef, ptr undef, align 64
+ store <42 x i1> undef, ptr undef, align 64
+ store <43 x i1> undef, ptr undef, align 64
+ store <44 x i1> undef, ptr undef, align 64
+ store <45 x i1> undef, ptr undef, align 64
+ store <46 x i1> undef, ptr undef, align 64
+ store <47 x i1> undef, ptr undef, align 64
+ store <48 x i1> undef, ptr undef, align 64
+ store <49 x i1> undef, ptr undef, align 64
+ store <50 x i1> undef, ptr undef, align 64
+ store <51 x i1> undef, ptr undef, align 64
+ store <52 x i1> undef, ptr undef, align 64
+ store <53 x i1> undef, ptr undef, align 64
+ store <54 x i1> undef, ptr undef, align 64
+ store <55 x i1> undef, ptr undef, align 64
+ store <56 x i1> undef, ptr undef, align 64
+ store <57 x i1> undef, ptr undef, align 64
+ store <58 x i1> undef, ptr undef, align 64
+ store <59 x i1> undef, ptr undef, align 64
+ store <60 x i1> undef, ptr undef, align 64
+ store <61 x i1> undef, ptr undef, align 64
+ store <62 x i1> undef, ptr undef, align 64
+ store <63 x i1> undef, ptr undef, align 64
; <64 x i1> is ZMM
ret i32 undef
define i32 @loads_align4(i32 %arg) {
; Scalars
; SSE2-LABEL: 'loads_align4'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load i1*, i1** undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <128 x i1>, <128 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, <16 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, <8 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, <4 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, <2 x i64>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, <1 x i128>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, <4 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, <2 x double>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x i8*>, <2 x i8*>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, <16 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, <8 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, <4 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, <2 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, <1 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = load <256 x i1>, <256 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <32 x i8>, <32 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <16 x i16>, <16 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = load <8 x i32>, <8 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = load <4 x i64>, <4 x i64>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, <2 x i128>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, <1 x i256>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = load <8 x float>, <8 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = load <4 x double>, <4 x double>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %35 = load <4 x i8*>, <4 x i8*>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, <32 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %37 = load <512 x i1>, <512 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %38 = load <64 x i8>, <64 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %39 = load <32 x i16>, <32 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %40 = load <16 x i32>, <16 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %41 = load <8 x i64>, <8 x i64>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, <4 x i128>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, <2 x i256>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, <1 x i512>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = load <16 x float>, <16 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %46 = load <8 x double>, <8 x double>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %47 = load <8 x i8*>, <8 x i8*>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, <64 x i1>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load ptr, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load ptr, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <128 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x ptr>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = load <256 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <32 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <16 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = load <8 x i32>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = load <4 x i64>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = load <8 x float>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = load <4 x double>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %35 = load <4 x ptr>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %37 = load <512 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %38 = load <64 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %39 = load <32 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %40 = load <16 x i32>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %41 = load <8 x i64>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = load <16 x float>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %46 = load <8 x double>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %47 = load <8 x ptr>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE41-LABEL: 'loads_align4'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load i1*, i1** undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <128 x i1>, <128 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, <16 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, <8 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, <4 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, <2 x i64>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, <1 x i128>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, <4 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, <2 x double>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x i8*>, <2 x i8*>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, <16 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, <8 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, <4 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, <2 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, <1 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = load <256 x i1>, <256 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <32 x i8>, <32 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <16 x i16>, <16 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = load <8 x i32>, <8 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = load <4 x i64>, <4 x i64>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, <2 x i128>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, <1 x i256>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = load <8 x float>, <8 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = load <4 x double>, <4 x double>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %35 = load <4 x i8*>, <4 x i8*>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, <32 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %37 = load <512 x i1>, <512 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %38 = load <64 x i8>, <64 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %39 = load <32 x i16>, <32 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %40 = load <16 x i32>, <16 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %41 = load <8 x i64>, <8 x i64>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, <4 x i128>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, <2 x i256>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, <1 x i512>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = load <16 x float>, <16 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %46 = load <8 x double>, <8 x double>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %47 = load <8 x i8*>, <8 x i8*>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, <64 x i1>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load ptr, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load ptr, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <128 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x ptr>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = load <256 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <32 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <16 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = load <8 x i32>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = load <4 x i64>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = load <8 x float>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = load <4 x double>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %35 = load <4 x ptr>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %37 = load <512 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %38 = load <64 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %39 = load <32 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %40 = load <16 x i32>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %41 = load <8 x i64>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = load <16 x float>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %46 = load <8 x double>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %47 = load <8 x ptr>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'loads_align4'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load i1*, i1** undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <128 x i1>, <128 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, <16 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, <8 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, <4 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, <2 x i64>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, <1 x i128>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, <4 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, <2 x double>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x i8*>, <2 x i8*>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, <16 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, <8 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, <4 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, <2 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, <1 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = load <256 x i1>, <256 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = load <32 x i8>, <32 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = load <16 x i16>, <16 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = load <8 x i32>, <8 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = load <4 x i64>, <4 x i64>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, <2 x i128>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, <1 x i256>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = load <8 x float>, <8 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = load <4 x double>, <4 x double>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = load <4 x i8*>, <4 x i8*>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, <32 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %37 = load <512 x i1>, <512 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %38 = load <64 x i8>, <64 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %39 = load <32 x i16>, <32 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %40 = load <16 x i32>, <16 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %41 = load <8 x i64>, <8 x i64>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, <4 x i128>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, <2 x i256>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, <1 x i512>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %45 = load <16 x float>, <16 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %46 = load <8 x double>, <8 x double>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = load <8 x i8*>, <8 x i8*>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, <64 x i1>* undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load ptr, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load ptr, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <128 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x ptr>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = load <256 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = load <32 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = load <16 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = load <8 x i32>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = load <4 x i64>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = load <8 x float>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = load <4 x double>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = load <4 x ptr>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %37 = load <512 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %38 = load <64 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %39 = load <32 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %40 = load <16 x i32>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %41 = load <8 x i64>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %45 = load <16 x float>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %46 = load <8 x double>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = load <8 x ptr>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, ptr undef, align 4
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'loads_align4'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load i1*, i1** undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = load <128 x i1>, <128 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, <16 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, <8 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, <4 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, <2 x i64>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, <1 x i128>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, <4 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, <2 x double>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x i8*>, <2 x i8*>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, <16 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, <8 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, <4 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, <2 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, <1 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = load <256 x i1>, <256 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = load <32 x i8>, <32 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = load <16 x i16>, <16 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = load <8 x i32>, <8 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = load <4 x i64>, <4 x i64>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, <2 x i128>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, <1 x i256>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = load <8 x float>, <8 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = load <4 x double>, <4 x double>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = load <4 x i8*>, <4 x i8*>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, <32 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %37 = load <512 x i1>, <512 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <64 x i8>, <64 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %39 = load <32 x i16>, <32 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <16 x i32>, <16 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = load <8 x i64>, <8 x i64>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, <4 x i128>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, <2 x i256>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, <1 x i512>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %45 = load <16 x float>, <16 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %46 = load <8 x double>, <8 x double>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %47 = load <8 x i8*>, <8 x i8*>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, <64 x i1>* undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load ptr, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load ptr, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = load <128 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x ptr>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = load <256 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = load <32 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = load <16 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = load <8 x i32>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = load <4 x i64>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = load <8 x float>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = load <4 x double>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = load <4 x ptr>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %37 = load <512 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <64 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %39 = load <32 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <16 x i32>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = load <8 x i64>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %45 = load <16 x float>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %46 = load <8 x double>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %47 = load <8 x ptr>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, ptr undef, align 4
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- load i8, i8* undef, align 4
- load i16, i16* undef, align 4
- load i32, i32* undef, align 4
- load i64, i64* undef, align 4
- load i128, i128* undef, align 4
- load i256, i256* undef, align 4
- load i512, i512* undef, align 4
+ load i8, ptr undef, align 4
+ load i16, ptr undef, align 4
+ load i32, ptr undef, align 4
+ load i64, ptr undef, align 4
+ load i128, ptr undef, align 4
+ load i256, ptr undef, align 4
+ load i512, ptr undef, align 4
- load float, float* undef, align 4
- load double, double* undef, align 4
+ load float, ptr undef, align 4
+ load double, ptr undef, align 4
- load i8*, i8** undef, align 4
+ load ptr, ptr undef, align 4
- load i1*, i1** undef, align 4
+ load ptr, ptr undef, align 4
; XMM (128-bit) vectors
- load <128 x i1>, <128 x i1>* undef, align 4
- load <16 x i8>, <16 x i8>* undef, align 4
- load <8 x i16>, <8 x i16>* undef, align 4
- load <4 x i32>, <4 x i32>* undef, align 4
- load <2 x i64>, <2 x i64>* undef, align 4
- load <1 x i128>, <1 x i128>* undef, align 4
+ load <128 x i1>, ptr undef, align 4
+ load <16 x i8>, ptr undef, align 4
+ load <8 x i16>, ptr undef, align 4
+ load <4 x i32>, ptr undef, align 4
+ load <2 x i64>, ptr undef, align 4
+ load <1 x i128>, ptr undef, align 4
- load <4 x float>, <4 x float>* undef, align 4
- load <2 x double>, <2 x double>* undef, align 4
+ load <4 x float>, ptr undef, align 4
+ load <2 x double>, ptr undef, align 4
- load <2 x i8*>, <2 x i8*>* undef, align 4
+ load <2 x ptr>, ptr undef, align 4
- load <16 x i1>, <16 x i1>* undef, align 4
- load <8 x i1>, <8 x i1>* undef, align 4
- load <4 x i1>, <4 x i1>* undef, align 4
- load <2 x i1>, <2 x i1>* undef, align 4
- load <1 x i1>, <1 x i1>* undef, align 4
+ load <16 x i1>, ptr undef, align 4
+ load <8 x i1>, ptr undef, align 4
+ load <4 x i1>, ptr undef, align 4
+ load <2 x i1>, ptr undef, align 4
+ load <1 x i1>, ptr undef, align 4
; YMM (256-bit) vectors
- load <256 x i1>, <256 x i1>* undef, align 4
- load <32 x i8>, <32 x i8>* undef, align 4
- load <16 x i16>, <16 x i16>* undef, align 4
- load <8 x i32>, <8 x i32>* undef, align 4
- load <4 x i64>, <4 x i64>* undef, align 4
- load <2 x i128>, <2 x i128>* undef, align 4
- load <1 x i256>, <1 x i256>* undef, align 4
+ load <256 x i1>, ptr undef, align 4
+ load <32 x i8>, ptr undef, align 4
+ load <16 x i16>, ptr undef, align 4
+ load <8 x i32>, ptr undef, align 4
+ load <4 x i64>, ptr undef, align 4
+ load <2 x i128>, ptr undef, align 4
+ load <1 x i256>, ptr undef, align 4
- load <8 x float>, <8 x float>* undef, align 4
- load <4 x double>, <4 x double>* undef, align 4
+ load <8 x float>, ptr undef, align 4
+ load <4 x double>, ptr undef, align 4
- load <4 x i8*>, <4 x i8*>* undef, align 4
+ load <4 x ptr>, ptr undef, align 4
- load <32 x i1>, <32 x i1>* undef, align 4
+ load <32 x i1>, ptr undef, align 4
; ZMM (512-bit) vectors
- load <512 x i1>, <512 x i1>* undef, align 4
- load <64 x i8>, <64 x i8>* undef, align 4
- load <32 x i16>, <32 x i16>* undef, align 4
- load <16 x i32>, <16 x i32>* undef, align 4
- load <8 x i64>, <8 x i64>* undef, align 4
- load <4 x i128>, <4 x i128>* undef, align 4
- load <2 x i256>, <2 x i256>* undef, align 4
- load <1 x i512>, <1 x i512>* undef, align 4
+ load <512 x i1>, ptr undef, align 4
+ load <64 x i8>, ptr undef, align 4
+ load <32 x i16>, ptr undef, align 4
+ load <16 x i32>, ptr undef, align 4
+ load <8 x i64>, ptr undef, align 4
+ load <4 x i128>, ptr undef, align 4
+ load <2 x i256>, ptr undef, align 4
+ load <1 x i512>, ptr undef, align 4
- load <16 x float>, <16 x float>* undef, align 4
- load <8 x double>, <8 x double>* undef, align 4
+ load <16 x float>, ptr undef, align 4
+ load <8 x double>, ptr undef, align 4
- load <8 x i8*>, <8 x i8*>* undef, align 4
+ load <8 x ptr>, ptr undef, align 4
- load <64 x i1>, <64 x i1>* undef, align 4
+ load <64 x i1>, ptr undef, align 4
ret i32 undef
}
define i32 @loads_partial_align4(i32 %arg) {
; Partial vectors with i64 elements (doubles as pointer-sized tests))
; SSE2-LABEL: 'loads_partial_align4'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, <3 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, <5 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, <6 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, <7 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %131 = load <9 x i1>, <9 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %132 = load <10 x i1>, <10 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %133 = load <11 x i1>, <11 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %134 = load <12 x i1>, <12 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %135 = load <13 x i1>, <13 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %136 = load <14 x i1>, <14 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %137 = load <15 x i1>, <15 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %138 = load <17 x i1>, <17 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %139 = load <18 x i1>, <18 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %140 = load <19 x i1>, <19 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %141 = load <20 x i1>, <20 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %142 = load <21 x i1>, <21 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %143 = load <22 x i1>, <22 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %144 = load <23 x i1>, <23 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %145 = load <24 x i1>, <24 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %146 = load <25 x i1>, <25 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %147 = load <26 x i1>, <26 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %148 = load <27 x i1>, <27 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %149 = load <28 x i1>, <28 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %150 = load <29 x i1>, <29 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %151 = load <30 x i1>, <30 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %152 = load <31 x i1>, <31 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %153 = load <33 x i1>, <33 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %154 = load <34 x i1>, <34 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %155 = load <35 x i1>, <35 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %156 = load <36 x i1>, <36 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %157 = load <37 x i1>, <37 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %158 = load <38 x i1>, <38 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %159 = load <39 x i1>, <39 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %160 = load <40 x i1>, <40 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %161 = load <41 x i1>, <41 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %162 = load <42 x i1>, <42 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %163 = load <43 x i1>, <43 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %164 = load <44 x i1>, <44 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %165 = load <45 x i1>, <45 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %166 = load <46 x i1>, <46 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %167 = load <47 x i1>, <47 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %168 = load <48 x i1>, <48 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %169 = load <49 x i1>, <49 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %170 = load <50 x i1>, <50 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %171 = load <51 x i1>, <51 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %172 = load <52 x i1>, <52 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %173 = load <53 x i1>, <53 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %174 = load <54 x i1>, <54 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %175 = load <55 x i1>, <55 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %176 = load <56 x i1>, <56 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %177 = load <57 x i1>, <57 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %178 = load <58 x i1>, <58 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %179 = load <59 x i1>, <59 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %180 = load <60 x i1>, <60 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %181 = load <61 x i1>, <61 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %182 = load <62 x i1>, <62 x i1>* undef, align 4
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %183 = load <63 x i1>, <63 x i1>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = load <3 x i64>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = load <5 x i64>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load <7 x i64>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = load <3 x double>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = load <5 x double>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = load <7 x double>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %13 = load <3 x i32>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = load <5 x i32>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = load <7 x i32>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = load <9 x i32>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %19 = load <11 x i32>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = load <13 x i32>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %23 = load <15 x i32>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %26 = load <3 x float>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <5 x float>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %29 = load <7 x float>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %32 = load <11 x float>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = load <13 x float>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %36 = load <15 x float>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %39 = load <3 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %41 = load <5 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %42 = load <6 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %43 = load <7 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %44 = load <9 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %46 = load <11 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %48 = load <13 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %49 = load <14 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %50 = load <15 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %51 = load <17 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %52 = load <18 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %53 = load <19 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %55 = load <21 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %56 = load <22 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %57 = load <23 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %59 = load <25 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %60 = load <26 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %61 = load <27 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %63 = load <29 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %64 = load <30 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %65 = load <31 x i16>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %68 = load <3 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %70 = load <5 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %71 = load <6 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %72 = load <7 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %74 = load <9 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %75 = load <10 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %76 = load <11 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %77 = load <12 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %78 = load <13 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %79 = load <14 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %80 = load <15 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %81 = load <17 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %83 = load <19 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %85 = load <21 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %86 = load <22 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %87 = load <23 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %89 = load <25 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %90 = load <26 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %91 = load <27 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %92 = load <28 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %93 = load <29 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %94 = load <30 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %95 = load <31 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %96 = load <33 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %97 = load <34 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %98 = load <35 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %99 = load <36 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %100 = load <37 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %101 = load <38 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %102 = load <39 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %104 = load <41 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %105 = load <42 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %106 = load <43 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %107 = load <44 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %108 = load <45 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %109 = load <46 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %110 = load <47 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %112 = load <49 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %113 = load <50 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %114 = load <51 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %115 = load <52 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %116 = load <53 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %117 = load <54 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %118 = load <55 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %120 = load <57 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %121 = load <58 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %122 = load <59 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %123 = load <60 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %124 = load <61 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %125 = load <62 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %126 = load <63 x i8>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %131 = load <9 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %132 = load <10 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %133 = load <11 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %134 = load <12 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %135 = load <13 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %136 = load <14 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %137 = load <15 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %138 = load <17 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %139 = load <18 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %140 = load <19 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %141 = load <20 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %142 = load <21 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %143 = load <22 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %144 = load <23 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %145 = load <24 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %146 = load <25 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %147 = load <26 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %148 = load <27 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %149 = load <28 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %150 = load <29 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %151 = load <30 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %152 = load <31 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %153 = load <33 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %154 = load <34 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %155 = load <35 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %156 = load <36 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %157 = load <37 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %158 = load <38 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %159 = load <39 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %160 = load <40 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %161 = load <41 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %162 = load <42 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %163 = load <43 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %164 = load <44 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %165 = load <45 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %166 = load <46 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %167 = load <47 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %168 = load <48 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %169 = load <49 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %170 = load <50 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %171 = load <51 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %172 = load <52 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %173 = load <53 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %174 = load <54 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %175 = load <55 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %176 = load <56 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %177 = load <57 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %178 = load <58 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %179 = load <59 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %180 = load <60 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %181 = load <61 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %182 = load <62 x i1>, ptr undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %183 = load <63 x i1>, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE41-LABEL: 'loads_partial_align4'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, <3 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, <5 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, <6 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, <7 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %131 = load <9 x i1>, <9 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %132 = load <10 x i1>, <10 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %133 = load <11 x i1>, <11 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %134 = load <12 x i1>, <12 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %135 = load <13 x i1>, <13 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %136 = load <14 x i1>, <14 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %137 = load <15 x i1>, <15 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %138 = load <17 x i1>, <17 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %139 = load <18 x i1>, <18 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %140 = load <19 x i1>, <19 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %141 = load <20 x i1>, <20 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %142 = load <21 x i1>, <21 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %143 = load <22 x i1>, <22 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %144 = load <23 x i1>, <23 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %145 = load <24 x i1>, <24 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %146 = load <25 x i1>, <25 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %147 = load <26 x i1>, <26 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %148 = load <27 x i1>, <27 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %149 = load <28 x i1>, <28 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %150 = load <29 x i1>, <29 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %151 = load <30 x i1>, <30 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %152 = load <31 x i1>, <31 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %153 = load <33 x i1>, <33 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %154 = load <34 x i1>, <34 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %155 = load <35 x i1>, <35 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %156 = load <36 x i1>, <36 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %157 = load <37 x i1>, <37 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %158 = load <38 x i1>, <38 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %159 = load <39 x i1>, <39 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %160 = load <40 x i1>, <40 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %161 = load <41 x i1>, <41 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %162 = load <42 x i1>, <42 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %163 = load <43 x i1>, <43 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %164 = load <44 x i1>, <44 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %165 = load <45 x i1>, <45 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %166 = load <46 x i1>, <46 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %167 = load <47 x i1>, <47 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %168 = load <48 x i1>, <48 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %169 = load <49 x i1>, <49 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %170 = load <50 x i1>, <50 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %171 = load <51 x i1>, <51 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %172 = load <52 x i1>, <52 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %173 = load <53 x i1>, <53 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %174 = load <54 x i1>, <54 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %175 = load <55 x i1>, <55 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %176 = load <56 x i1>, <56 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %177 = load <57 x i1>, <57 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %178 = load <58 x i1>, <58 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %179 = load <59 x i1>, <59 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %180 = load <60 x i1>, <60 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %181 = load <61 x i1>, <61 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %182 = load <62 x i1>, <62 x i1>* undef, align 4
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %183 = load <63 x i1>, <63 x i1>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = load <3 x i64>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = load <5 x i64>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load <7 x i64>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = load <3 x double>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = load <5 x double>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = load <7 x double>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x i32>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = load <5 x i32>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = load <7 x i32>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = load <9 x i32>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %19 = load <11 x i32>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = load <13 x i32>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <15 x i32>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = load <3 x float>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <5 x float>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %29 = load <7 x float>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %32 = load <11 x float>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = load <13 x float>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %36 = load <15 x float>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %42 = load <6 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %43 = load <7 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %44 = load <9 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %46 = load <11 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %48 = load <13 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %49 = load <14 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %50 = load <15 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %51 = load <17 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %52 = load <18 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %53 = load <19 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %56 = load <22 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %57 = load <23 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %59 = load <25 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %60 = load <26 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %61 = load <27 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %63 = load <29 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %64 = load <30 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %65 = load <31 x i16>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %68 = load <3 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %71 = load <6 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %72 = load <7 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %75 = load <10 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %76 = load <11 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %77 = load <12 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %78 = load <13 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %79 = load <14 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %80 = load <15 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %81 = load <17 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %83 = load <19 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %85 = load <21 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %86 = load <22 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %87 = load <23 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %89 = load <25 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %90 = load <26 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %91 = load <27 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %92 = load <28 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %93 = load <29 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %94 = load <30 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %95 = load <31 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %96 = load <33 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %97 = load <34 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %98 = load <35 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %99 = load <36 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %100 = load <37 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %101 = load <38 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %102 = load <39 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %104 = load <41 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %105 = load <42 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %106 = load <43 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %107 = load <44 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %108 = load <45 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %109 = load <46 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %110 = load <47 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %112 = load <49 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %113 = load <50 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %114 = load <51 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %115 = load <52 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %116 = load <53 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %117 = load <54 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %118 = load <55 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %120 = load <57 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %121 = load <58 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %122 = load <59 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %123 = load <60 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %124 = load <61 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %125 = load <62 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %126 = load <63 x i8>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %131 = load <9 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %132 = load <10 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %133 = load <11 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %134 = load <12 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %135 = load <13 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %136 = load <14 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %137 = load <15 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %138 = load <17 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %139 = load <18 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %140 = load <19 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %141 = load <20 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %142 = load <21 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %143 = load <22 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %144 = load <23 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %145 = load <24 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %146 = load <25 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %147 = load <26 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %148 = load <27 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %149 = load <28 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %150 = load <29 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %151 = load <30 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %152 = load <31 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %153 = load <33 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %154 = load <34 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %155 = load <35 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %156 = load <36 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %157 = load <37 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %158 = load <38 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %159 = load <39 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %160 = load <40 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %161 = load <41 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %162 = load <42 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %163 = load <43 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %164 = load <44 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %165 = load <45 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %166 = load <46 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %167 = load <47 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %168 = load <48 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %169 = load <49 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %170 = load <50 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %171 = load <51 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %172 = load <52 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %173 = load <53 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %174 = load <54 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %175 = load <55 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %176 = load <56 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %177 = load <57 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %178 = load <58 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %179 = load <59 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %180 = load <60 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %181 = load <61 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %182 = load <62 x i1>, ptr undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %183 = load <63 x i1>, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'loads_partial_align4'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, <3 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, <5 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, <6 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, <7 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %131 = load <9 x i1>, <9 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %132 = load <10 x i1>, <10 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %133 = load <11 x i1>, <11 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %134 = load <12 x i1>, <12 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %135 = load <13 x i1>, <13 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %136 = load <14 x i1>, <14 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %137 = load <15 x i1>, <15 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %138 = load <17 x i1>, <17 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %139 = load <18 x i1>, <18 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %140 = load <19 x i1>, <19 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %141 = load <20 x i1>, <20 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %142 = load <21 x i1>, <21 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %143 = load <22 x i1>, <22 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %144 = load <23 x i1>, <23 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %145 = load <24 x i1>, <24 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %146 = load <25 x i1>, <25 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %147 = load <26 x i1>, <26 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %148 = load <27 x i1>, <27 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %149 = load <28 x i1>, <28 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %150 = load <29 x i1>, <29 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %151 = load <30 x i1>, <30 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %152 = load <31 x i1>, <31 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %153 = load <33 x i1>, <33 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %154 = load <34 x i1>, <34 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %155 = load <35 x i1>, <35 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %156 = load <36 x i1>, <36 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %157 = load <37 x i1>, <37 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %158 = load <38 x i1>, <38 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %159 = load <39 x i1>, <39 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %160 = load <40 x i1>, <40 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %161 = load <41 x i1>, <41 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %162 = load <42 x i1>, <42 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %163 = load <43 x i1>, <43 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %164 = load <44 x i1>, <44 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %165 = load <45 x i1>, <45 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %166 = load <46 x i1>, <46 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %167 = load <47 x i1>, <47 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %168 = load <48 x i1>, <48 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %169 = load <49 x i1>, <49 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %170 = load <50 x i1>, <50 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %171 = load <51 x i1>, <51 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %172 = load <52 x i1>, <52 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %173 = load <53 x i1>, <53 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %174 = load <54 x i1>, <54 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %175 = load <55 x i1>, <55 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %176 = load <56 x i1>, <56 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %177 = load <57 x i1>, <57 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %178 = load <58 x i1>, <58 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %179 = load <59 x i1>, <59 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %180 = load <60 x i1>, <60 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %181 = load <61 x i1>, <61 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %182 = load <62 x i1>, <62 x i1>* undef, align 4
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %183 = load <63 x i1>, <63 x i1>* undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = load <3 x i64>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = load <5 x i64>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load <6 x i64>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load <7 x i64>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = load <3 x double>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = load <5 x double>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = load <6 x double>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = load <7 x double>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x i32>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %14 = load <5 x i32>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <6 x i32>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <7 x i32>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <9 x i32>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = load <10 x i32>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %19 = load <11 x i32>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = load <12 x i32>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %21 = load <13 x i32>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <15 x i32>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = load <3 x float>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %27 = load <5 x float>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = load <6 x float>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %29 = load <7 x float>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = load <9 x float>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = load <10 x float>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <11 x float>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = load <12 x float>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = load <13 x float>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %36 = load <15 x float>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %42 = load <6 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %43 = load <7 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %44 = load <9 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = load <10 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %46 = load <11 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %47 = load <12 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %48 = load <13 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %49 = load <14 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %50 = load <15 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %51 = load <17 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %52 = load <18 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %53 = load <19 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %54 = load <20 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %55 = load <21 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %56 = load <22 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %57 = load <23 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %58 = load <24 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %59 = load <25 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %60 = load <26 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %61 = load <27 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %63 = load <29 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %64 = load <30 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %65 = load <31 x i16>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %68 = load <3 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %71 = load <6 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %72 = load <7 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %75 = load <10 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %76 = load <11 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %77 = load <12 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %78 = load <13 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %79 = load <14 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %80 = load <15 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %81 = load <17 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %82 = load <18 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %83 = load <19 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %84 = load <20 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %85 = load <21 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %86 = load <22 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %87 = load <23 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %88 = load <24 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %89 = load <25 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %90 = load <26 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %91 = load <27 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %92 = load <28 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %93 = load <29 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %94 = load <30 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %95 = load <31 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %96 = load <33 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %97 = load <34 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %98 = load <35 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %99 = load <36 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %100 = load <37 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %101 = load <38 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %102 = load <39 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %103 = load <40 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %104 = load <41 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %105 = load <42 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %106 = load <43 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %107 = load <44 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %108 = load <45 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %109 = load <46 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %110 = load <47 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %111 = load <48 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %112 = load <49 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %113 = load <50 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %114 = load <51 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %115 = load <52 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %116 = load <53 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %117 = load <54 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %118 = load <55 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %120 = load <57 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %121 = load <58 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %122 = load <59 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %123 = load <60 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %124 = load <61 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %125 = load <62 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %126 = load <63 x i8>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %131 = load <9 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %132 = load <10 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %133 = load <11 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %134 = load <12 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %135 = load <13 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %136 = load <14 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %137 = load <15 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %138 = load <17 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %139 = load <18 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %140 = load <19 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %141 = load <20 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %142 = load <21 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %143 = load <22 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %144 = load <23 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %145 = load <24 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %146 = load <25 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %147 = load <26 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %148 = load <27 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %149 = load <28 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %150 = load <29 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %151 = load <30 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %152 = load <31 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %153 = load <33 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %154 = load <34 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %155 = load <35 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %156 = load <36 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %157 = load <37 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %158 = load <38 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %159 = load <39 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %160 = load <40 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %161 = load <41 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %162 = load <42 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %163 = load <43 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %164 = load <44 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %165 = load <45 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %166 = load <46 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %167 = load <47 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %168 = load <48 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %169 = load <49 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %170 = load <50 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %171 = load <51 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %172 = load <52 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %173 = load <53 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %174 = load <54 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %175 = load <55 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %176 = load <56 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %177 = load <57 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %178 = load <58 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %179 = load <59 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %180 = load <60 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %181 = load <61 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %182 = load <62 x i1>, ptr undef, align 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %183 = load <63 x i1>, ptr undef, align 4
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'loads_partial_align4'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, <3 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, <5 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, <6 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, <7 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %131 = load <9 x i1>, <9 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %132 = load <10 x i1>, <10 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %133 = load <11 x i1>, <11 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %134 = load <12 x i1>, <12 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %135 = load <13 x i1>, <13 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %136 = load <14 x i1>, <14 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %137 = load <15 x i1>, <15 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %138 = load <17 x i1>, <17 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %139 = load <18 x i1>, <18 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %140 = load <19 x i1>, <19 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %141 = load <20 x i1>, <20 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %142 = load <21 x i1>, <21 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %143 = load <22 x i1>, <22 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %144 = load <23 x i1>, <23 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %145 = load <24 x i1>, <24 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %146 = load <25 x i1>, <25 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %147 = load <26 x i1>, <26 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %148 = load <27 x i1>, <27 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %149 = load <28 x i1>, <28 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %150 = load <29 x i1>, <29 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %151 = load <30 x i1>, <30 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %152 = load <31 x i1>, <31 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %153 = load <33 x i1>, <33 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %154 = load <34 x i1>, <34 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %155 = load <35 x i1>, <35 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %156 = load <36 x i1>, <36 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %157 = load <37 x i1>, <37 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %158 = load <38 x i1>, <38 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %159 = load <39 x i1>, <39 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %160 = load <40 x i1>, <40 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %161 = load <41 x i1>, <41 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %162 = load <42 x i1>, <42 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %163 = load <43 x i1>, <43 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %164 = load <44 x i1>, <44 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %165 = load <45 x i1>, <45 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %166 = load <46 x i1>, <46 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %167 = load <47 x i1>, <47 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %168 = load <48 x i1>, <48 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %169 = load <49 x i1>, <49 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %170 = load <50 x i1>, <50 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %171 = load <51 x i1>, <51 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %172 = load <52 x i1>, <52 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %173 = load <53 x i1>, <53 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %174 = load <54 x i1>, <54 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %175 = load <55 x i1>, <55 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %176 = load <56 x i1>, <56 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %177 = load <57 x i1>, <57 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %178 = load <58 x i1>, <58 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %179 = load <59 x i1>, <59 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %180 = load <60 x i1>, <60 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %181 = load <61 x i1>, <61 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %182 = load <62 x i1>, <62 x i1>* undef, align 4
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %183 = load <63 x i1>, <63 x i1>* undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = load <3 x i64>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = load <5 x i64>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %5 = load <7 x i64>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = load <3 x double>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = load <5 x double>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %10 = load <7 x double>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x i32>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %14 = load <5 x i32>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <6 x i32>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <7 x i32>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %17 = load <9 x i32>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %19 = load <11 x i32>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = load <13 x i32>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %22 = load <14 x i32>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = load <15 x i32>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = load <3 x float>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %27 = load <5 x float>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = load <6 x float>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %29 = load <7 x float>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %32 = load <11 x float>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %34 = load <13 x float>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %35 = load <14 x float>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %36 = load <15 x float>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %42 = load <6 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %43 = load <7 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %44 = load <9 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = load <10 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %46 = load <11 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %47 = load <12 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %48 = load <13 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %49 = load <14 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %50 = load <15 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %51 = load <17 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %52 = load <18 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %53 = load <19 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %56 = load <22 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %57 = load <23 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %59 = load <25 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %60 = load <26 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %61 = load <27 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %62 = load <28 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %63 = load <29 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %64 = load <30 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %65 = load <31 x i16>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %68 = load <3 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %71 = load <6 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %72 = load <7 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %75 = load <10 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %76 = load <11 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %77 = load <12 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %78 = load <13 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %79 = load <14 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %80 = load <15 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %81 = load <17 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %82 = load <18 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %83 = load <19 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %84 = load <20 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %85 = load <21 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %86 = load <22 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %87 = load <23 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %88 = load <24 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %89 = load <25 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %90 = load <26 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %91 = load <27 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %92 = load <28 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %93 = load <29 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %94 = load <30 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %95 = load <31 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %96 = load <33 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %97 = load <34 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %98 = load <35 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %99 = load <36 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %100 = load <37 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %101 = load <38 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %102 = load <39 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %104 = load <41 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %105 = load <42 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %106 = load <43 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %107 = load <44 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %108 = load <45 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %109 = load <46 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %110 = load <47 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %112 = load <49 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %113 = load <50 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %114 = load <51 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %115 = load <52 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %116 = load <53 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %117 = load <54 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %118 = load <55 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %119 = load <56 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %120 = load <57 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %121 = load <58 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %122 = load <59 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %123 = load <60 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %124 = load <61 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %125 = load <62 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %126 = load <63 x i8>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %131 = load <9 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %132 = load <10 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %133 = load <11 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %134 = load <12 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %135 = load <13 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %136 = load <14 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %137 = load <15 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %138 = load <17 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %139 = load <18 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %140 = load <19 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %141 = load <20 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %142 = load <21 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %143 = load <22 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %144 = load <23 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %145 = load <24 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %146 = load <25 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %147 = load <26 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %148 = load <27 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %149 = load <28 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %150 = load <29 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %151 = load <30 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %152 = load <31 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %153 = load <33 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %154 = load <34 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %155 = load <35 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %156 = load <36 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %157 = load <37 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %158 = load <38 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %159 = load <39 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %160 = load <40 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %161 = load <41 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %162 = load <42 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %163 = load <43 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %164 = load <44 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %165 = load <45 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %166 = load <46 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %167 = load <47 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %168 = load <48 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %169 = load <49 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %170 = load <50 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %171 = load <51 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %172 = load <52 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %173 = load <53 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %174 = load <54 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %175 = load <55 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %176 = load <56 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %177 = load <57 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %178 = load <58 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %179 = load <59 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %180 = load <60 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %181 = load <61 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %182 = load <62 x i1>, ptr undef, align 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %183 = load <63 x i1>, ptr undef, align 4
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- load <1 x i64>, <1 x i64>* undef, align 4
+ load <1 x i64>, ptr undef, align 4
; <2 x i64> is XMM
- load <3 x i64>, <3 x i64>* undef, align 4
+ load <3 x i64>, ptr undef, align 4
; <4 x i64> is YMM
- load <5 x i64>, <5 x i64>* undef, align 4
- load <6 x i64>, <6 x i64>* undef, align 4
- load <7 x i64>, <7 x i64>* undef, align 4
+ load <5 x i64>, ptr undef, align 4
+ load <6 x i64>, ptr undef, align 4
+ load <7 x i64>, ptr undef, align 4
; <8 x i64> is ZMM
; Partial vectors with double elements
- load <1 x double>, <1 x double>* undef, align 4
+ load <1 x double>, ptr undef, align 4
; <2 x double> is XMM
- load <3 x double>, <3 x double>* undef, align 4
+ load <3 x double>, ptr undef, align 4
; <4 x double> is YMM
- load <5 x double>, <5 x double>* undef, align 4
- load <6 x double>, <6 x double>* undef, align 4
- load <7 x double>, <7 x double>* undef, align 4
+ load <5 x double>, ptr undef, align 4
+ load <6 x double>, ptr undef, align 4
+ load <7 x double>, ptr undef, align 4
; <8 x double> is ZMM
; Partial vectors with i32 elements
- load <1 x i32>, <1 x i32>* undef, align 4
- load <2 x i32>, <2 x i32>* undef, align 4
- load <3 x i32>, <3 x i32>* undef, align 4
+ load <1 x i32>, ptr undef, align 4
+ load <2 x i32>, ptr undef, align 4
+ load <3 x i32>, ptr undef, align 4
; <4 x i32> is XMM
- load <5 x i32>, <5 x i32>* undef, align 4
- load <6 x i32>, <6 x i32>* undef, align 4
- load <7 x i32>, <7 x i32>* undef, align 4
+ load <5 x i32>, ptr undef, align 4
+ load <6 x i32>, ptr undef, align 4
+ load <7 x i32>, ptr undef, align 4
; <8 x i32> is YMM
- load <9 x i32>, <9 x i32>* undef, align 4
- load <10 x i32>, <10 x i32>* undef, align 4
- load <11 x i32>, <11 x i32>* undef, align 4
- load <12 x i32>, <12 x i32>* undef, align 4
- load <13 x i32>, <13 x i32>* undef, align 4
- load <14 x i32>, <14 x i32>* undef, align 4
- load <15 x i32>, <15 x i32>* undef, align 4
+ load <9 x i32>, ptr undef, align 4
+ load <10 x i32>, ptr undef, align 4
+ load <11 x i32>, ptr undef, align 4
+ load <12 x i32>, ptr undef, align 4
+ load <13 x i32>, ptr undef, align 4
+ load <14 x i32>, ptr undef, align 4
+ load <15 x i32>, ptr undef, align 4
; <16 x i32> is ZMM
; Partial vectors with float elements
- load <1 x float>, <1 x float>* undef, align 4
- load <2 x float>, <2 x float>* undef, align 4
- load <3 x float>, <3 x float>* undef, align 4
+ load <1 x float>, ptr undef, align 4
+ load <2 x float>, ptr undef, align 4
+ load <3 x float>, ptr undef, align 4
; <4 x float> is XMM
- load <5 x float>, <5 x float>* undef, align 4
- load <6 x float>, <6 x float>* undef, align 4
- load <7 x float>, <7 x float>* undef, align 4
+ load <5 x float>, ptr undef, align 4
+ load <6 x float>, ptr undef, align 4
+ load <7 x float>, ptr undef, align 4
; <8 x float> is YMM
- load <9 x float>, <9 x float>* undef, align 4
- load <10 x float>, <10 x float>* undef, align 4
- load <11 x float>, <11 x float>* undef, align 4
- load <12 x float>, <12 x float>* undef, align 4
- load <13 x float>, <13 x float>* undef, align 4
- load <14 x float>, <14 x float>* undef, align 4
- load <15 x float>, <15 x float>* undef, align 4
+ load <9 x float>, ptr undef, align 4
+ load <10 x float>, ptr undef, align 4
+ load <11 x float>, ptr undef, align 4
+ load <12 x float>, ptr undef, align 4
+ load <13 x float>, ptr undef, align 4
+ load <14 x float>, ptr undef, align 4
+ load <15 x float>, ptr undef, align 4
; <16 x float> is ZMM
; Partial vectors with i16 elements
- load <1 x i16>, <1 x i16>* undef, align 4
- load <2 x i16>, <2 x i16>* undef, align 4
- load <3 x i16>, <3 x i16>* undef, align 4
- load <4 x i16>, <4 x i16>* undef, align 4
- load <5 x i16>, <5 x i16>* undef, align 4
- load <6 x i16>, <6 x i16>* undef, align 4
- load <7 x i16>, <7 x i16>* undef, align 4
+ load <1 x i16>, ptr undef, align 4
+ load <2 x i16>, ptr undef, align 4
+ load <3 x i16>, ptr undef, align 4
+ load <4 x i16>, ptr undef, align 4
+ load <5 x i16>, ptr undef, align 4
+ load <6 x i16>, ptr undef, align 4
+ load <7 x i16>, ptr undef, align 4
; <8 x i16> is XMM
- load <9 x i16>, <9 x i16>* undef, align 4
- load <10 x i16>, <10 x i16>* undef, align 4
- load <11 x i16>, <11 x i16>* undef, align 4
- load <12 x i16>, <12 x i16>* undef, align 4
- load <13 x i16>, <13 x i16>* undef, align 4
- load <14 x i16>, <14 x i16>* undef, align 4
- load <15 x i16>, <15 x i16>* undef, align 4
+ load <9 x i16>, ptr undef, align 4
+ load <10 x i16>, ptr undef, align 4
+ load <11 x i16>, ptr undef, align 4
+ load <12 x i16>, ptr undef, align 4
+ load <13 x i16>, ptr undef, align 4
+ load <14 x i16>, ptr undef, align 4
+ load <15 x i16>, ptr undef, align 4
; <16 x i16> is YMM
- load <17 x i16>, <17 x i16>* undef, align 4
- load <18 x i16>, <18 x i16>* undef, align 4
- load <19 x i16>, <19 x i16>* undef, align 4
- load <20 x i16>, <20 x i16>* undef, align 4
- load <21 x i16>, <21 x i16>* undef, align 4
- load <22 x i16>, <22 x i16>* undef, align 4
- load <23 x i16>, <23 x i16>* undef, align 4
- load <24 x i16>, <24 x i16>* undef, align 4
- load <25 x i16>, <25 x i16>* undef, align 4
- load <26 x i16>, <26 x i16>* undef, align 4
- load <27 x i16>, <27 x i16>* undef, align 4
- load <28 x i16>, <28 x i16>* undef, align 4
- load <29 x i16>, <29 x i16>* undef, align 4
- load <30 x i16>, <30 x i16>* undef, align 4
- load <31 x i16>, <31 x i16>* undef, align 4
+ load <17 x i16>, ptr undef, align 4
+ load <18 x i16>, ptr undef, align 4
+ load <19 x i16>, ptr undef, align 4
+ load <20 x i16>, ptr undef, align 4
+ load <21 x i16>, ptr undef, align 4
+ load <22 x i16>, ptr undef, align 4
+ load <23 x i16>, ptr undef, align 4
+ load <24 x i16>, ptr undef, align 4
+ load <25 x i16>, ptr undef, align 4
+ load <26 x i16>, ptr undef, align 4
+ load <27 x i16>, ptr undef, align 4
+ load <28 x i16>, ptr undef, align 4
+ load <29 x i16>, ptr undef, align 4
+ load <30 x i16>, ptr undef, align 4
+ load <31 x i16>, ptr undef, align 4
; <32 x i16> is ZMM
; Partial vectors with i8 elements
- load <1 x i8>, <1 x i8>* undef, align 4
- load <2 x i8>, <2 x i8>* undef, align 4
- load <3 x i8>, <3 x i8>* undef, align 4
- load <4 x i8>, <4 x i8>* undef, align 4
- load <5 x i8>, <5 x i8>* undef, align 4
- load <6 x i8>, <6 x i8>* undef, align 4
- load <7 x i8>, <7 x i8>* undef, align 4
- load <8 x i8>, <8 x i8>* undef, align 4
- load <9 x i8>, <9 x i8>* undef, align 4
- load <10 x i8>, <10 x i8>* undef, align 4
- load <11 x i8>, <11 x i8>* undef, align 4
- load <12 x i8>, <12 x i8>* undef, align 4
- load <13 x i8>, <13 x i8>* undef, align 4
- load <14 x i8>, <14 x i8>* undef, align 4
- load <15 x i8>, <15 x i8>* undef, align 4
+ load <1 x i8>, ptr undef, align 4
+ load <2 x i8>, ptr undef, align 4
+ load <3 x i8>, ptr undef, align 4
+ load <4 x i8>, ptr undef, align 4
+ load <5 x i8>, ptr undef, align 4
+ load <6 x i8>, ptr undef, align 4
+ load <7 x i8>, ptr undef, align 4
+ load <8 x i8>, ptr undef, align 4
+ load <9 x i8>, ptr undef, align 4
+ load <10 x i8>, ptr undef, align 4
+ load <11 x i8>, ptr undef, align 4
+ load <12 x i8>, ptr undef, align 4
+ load <13 x i8>, ptr undef, align 4
+ load <14 x i8>, ptr undef, align 4
+ load <15 x i8>, ptr undef, align 4
; <16 x i8> is XMM
- load <17 x i8>, <17 x i8>* undef, align 4
- load <18 x i8>, <18 x i8>* undef, align 4
- load <19 x i8>, <19 x i8>* undef, align 4
- load <20 x i8>, <20 x i8>* undef, align 4
- load <21 x i8>, <21 x i8>* undef, align 4
- load <22 x i8>, <22 x i8>* undef, align 4
- load <23 x i8>, <23 x i8>* undef, align 4
- load <24 x i8>, <24 x i8>* undef, align 4
- load <25 x i8>, <25 x i8>* undef, align 4
- load <26 x i8>, <26 x i8>* undef, align 4
- load <27 x i8>, <27 x i8>* undef, align 4
- load <28 x i8>, <28 x i8>* undef, align 4
- load <29 x i8>, <29 x i8>* undef, align 4
- load <30 x i8>, <30 x i8>* undef, align 4
- load <31 x i8>, <31 x i8>* undef, align 4
+ load <17 x i8>, ptr undef, align 4
+ load <18 x i8>, ptr undef, align 4
+ load <19 x i8>, ptr undef, align 4
+ load <20 x i8>, ptr undef, align 4
+ load <21 x i8>, ptr undef, align 4
+ load <22 x i8>, ptr undef, align 4
+ load <23 x i8>, ptr undef, align 4
+ load <24 x i8>, ptr undef, align 4
+ load <25 x i8>, ptr undef, align 4
+ load <26 x i8>, ptr undef, align 4
+ load <27 x i8>, ptr undef, align 4
+ load <28 x i8>, ptr undef, align 4
+ load <29 x i8>, ptr undef, align 4
+ load <30 x i8>, ptr undef, align 4
+ load <31 x i8>, ptr undef, align 4
; <32 x i8> is YMM
- load <33 x i8>, <33 x i8>* undef, align 4
- load <34 x i8>, <34 x i8>* undef, align 4
- load <35 x i8>, <35 x i8>* undef, align 4
- load <36 x i8>, <36 x i8>* undef, align 4
- load <37 x i8>, <37 x i8>* undef, align 4
- load <38 x i8>, <38 x i8>* undef, align 4
- load <39 x i8>, <39 x i8>* undef, align 4
- load <40 x i8>, <40 x i8>* undef, align 4
- load <41 x i8>, <41 x i8>* undef, align 4
- load <42 x i8>, <42 x i8>* undef, align 4
- load <43 x i8>, <43 x i8>* undef, align 4
- load <44 x i8>, <44 x i8>* undef, align 4
- load <45 x i8>, <45 x i8>* undef, align 4
- load <46 x i8>, <46 x i8>* undef, align 4
- load <47 x i8>, <47 x i8>* undef, align 4
- load <48 x i8>, <48 x i8>* undef, align 4
- load <49 x i8>, <49 x i8>* undef, align 4
- load <50 x i8>, <50 x i8>* undef, align 4
- load <51 x i8>, <51 x i8>* undef, align 4
- load <52 x i8>, <52 x i8>* undef, align 4
- load <53 x i8>, <53 x i8>* undef, align 4
- load <54 x i8>, <54 x i8>* undef, align 4
- load <55 x i8>, <55 x i8>* undef, align 4
- load <56 x i8>, <56 x i8>* undef, align 4
- load <57 x i8>, <57 x i8>* undef, align 4
- load <58 x i8>, <58 x i8>* undef, align 4
- load <59 x i8>, <59 x i8>* undef, align 4
- load <60 x i8>, <60 x i8>* undef, align 4
- load <61 x i8>, <61 x i8>* undef, align 4
- load <62 x i8>, <62 x i8>* undef, align 4
- load <63 x i8>, <63 x i8>* undef, align 4
+ load <33 x i8>, ptr undef, align 4
+ load <34 x i8>, ptr undef, align 4
+ load <35 x i8>, ptr undef, align 4
+ load <36 x i8>, ptr undef, align 4
+ load <37 x i8>, ptr undef, align 4
+ load <38 x i8>, ptr undef, align 4
+ load <39 x i8>, ptr undef, align 4
+ load <40 x i8>, ptr undef, align 4
+ load <41 x i8>, ptr undef, align 4
+ load <42 x i8>, ptr undef, align 4
+ load <43 x i8>, ptr undef, align 4
+ load <44 x i8>, ptr undef, align 4
+ load <45 x i8>, ptr undef, align 4
+ load <46 x i8>, ptr undef, align 4
+ load <47 x i8>, ptr undef, align 4
+ load <48 x i8>, ptr undef, align 4
+ load <49 x i8>, ptr undef, align 4
+ load <50 x i8>, ptr undef, align 4
+ load <51 x i8>, ptr undef, align 4
+ load <52 x i8>, ptr undef, align 4
+ load <53 x i8>, ptr undef, align 4
+ load <54 x i8>, ptr undef, align 4
+ load <55 x i8>, ptr undef, align 4
+ load <56 x i8>, ptr undef, align 4
+ load <57 x i8>, ptr undef, align 4
+ load <58 x i8>, ptr undef, align 4
+ load <59 x i8>, ptr undef, align 4
+ load <60 x i8>, ptr undef, align 4
+ load <61 x i8>, ptr undef, align 4
+ load <62 x i8>, ptr undef, align 4
+ load <63 x i8>, ptr undef, align 4
; <64 x i8> is ZMM
; Partial vectors with i1 elements
; <1 x i1> is XMM
; <2 x i1> is XMM
- load <3 x i1>, <3 x i1>* undef, align 4
+ load <3 x i1>, ptr undef, align 4
; <4 x i1> is XMM
- load <5 x i1>, <5 x i1>* undef, align 4
- load <6 x i1>, <6 x i1>* undef, align 4
- load <7 x i1>, <7 x i1>* undef, align 4
+ load <5 x i1>, ptr undef, align 4
+ load <6 x i1>, ptr undef, align 4
+ load <7 x i1>, ptr undef, align 4
; <8 x i1> is XMM
- load <9 x i1>, <9 x i1>* undef, align 4
- load <10 x i1>, <10 x i1>* undef, align 4
- load <11 x i1>, <11 x i1>* undef, align 4
- load <12 x i1>, <12 x i1>* undef, align 4
- load <13 x i1>, <13 x i1>* undef, align 4
- load <14 x i1>, <14 x i1>* undef, align 4
- load <15 x i1>, <15 x i1>* undef, align 4
+ load <9 x i1>, ptr undef, align 4
+ load <10 x i1>, ptr undef, align 4
+ load <11 x i1>, ptr undef, align 4
+ load <12 x i1>, ptr undef, align 4
+ load <13 x i1>, ptr undef, align 4
+ load <14 x i1>, ptr undef, align 4
+ load <15 x i1>, ptr undef, align 4
; <16 x i1> is XMM
- load <17 x i1>, <17 x i1>* undef, align 4
- load <18 x i1>, <18 x i1>* undef, align 4
- load <19 x i1>, <19 x i1>* undef, align 4
- load <20 x i1>, <20 x i1>* undef, align 4
- load <21 x i1>, <21 x i1>* undef, align 4
- load <22 x i1>, <22 x i1>* undef, align 4
- load <23 x i1>, <23 x i1>* undef, align 4
- load <24 x i1>, <24 x i1>* undef, align 4
- load <25 x i1>, <25 x i1>* undef, align 4
- load <26 x i1>, <26 x i1>* undef, align 4
- load <27 x i1>, <27 x i1>* undef, align 4
- load <28 x i1>, <28 x i1>* undef, align 4
- load <29 x i1>, <29 x i1>* undef, align 4
- load <30 x i1>, <30 x i1>* undef, align 4
- load <31 x i1>, <31 x i1>* undef, align 4
+ load <17 x i1>, ptr undef, align 4
+ load <18 x i1>, ptr undef, align 4
+ load <19 x i1>, ptr undef, align 4
+ load <20 x i1>, ptr undef, align 4
+ load <21 x i1>, ptr undef, align 4
+ load <22 x i1>, ptr undef, align 4
+ load <23 x i1>, ptr undef, align 4
+ load <24 x i1>, ptr undef, align 4
+ load <25 x i1>, ptr undef, align 4
+ load <26 x i1>, ptr undef, align 4
+ load <27 x i1>, ptr undef, align 4
+ load <28 x i1>, ptr undef, align 4
+ load <29 x i1>, ptr undef, align 4
+ load <30 x i1>, ptr undef, align 4
+ load <31 x i1>, ptr undef, align 4
; <32 x i1> is YMM
- load <33 x i1>, <33 x i1>* undef, align 4
- load <34 x i1>, <34 x i1>* undef, align 4
- load <35 x i1>, <35 x i1>* undef, align 4
- load <36 x i1>, <36 x i1>* undef, align 4
- load <37 x i1>, <37 x i1>* undef, align 4
- load <38 x i1>, <38 x i1>* undef, align 4
- load <39 x i1>, <39 x i1>* undef, align 4
- load <40 x i1>, <40 x i1>* undef, align 4
- load <41 x i1>, <41 x i1>* undef, align 4
- load <42 x i1>, <42 x i1>* undef, align 4
- load <43 x i1>, <43 x i1>* undef, align 4
- load <44 x i1>, <44 x i1>* undef, align 4
- load <45 x i1>, <45 x i1>* undef, align 4
- load <46 x i1>, <46 x i1>* undef, align 4
- load <47 x i1>, <47 x i1>* undef, align 4
- load <48 x i1>, <48 x i1>* undef, align 4
- load <49 x i1>, <49 x i1>* undef, align 4
- load <50 x i1>, <50 x i1>* undef, align 4
- load <51 x i1>, <51 x i1>* undef, align 4
- load <52 x i1>, <52 x i1>* undef, align 4
- load <53 x i1>, <53 x i1>* undef, align 4
- load <54 x i1>, <54 x i1>* undef, align 4
- load <55 x i1>, <55 x i1>* undef, align 4
- load <56 x i1>, <56 x i1>* undef, align 4
- load <57 x i1>, <57 x i1>* undef, align 4
- load <58 x i1>, <58 x i1>* undef, align 4
- load <59 x i1>, <59 x i1>* undef, align 4
- load <60 x i1>, <60 x i1>* undef, align 4
- load <61 x i1>, <61 x i1>* undef, align 4
- load <62 x i1>, <62 x i1>* undef, align 4
- load <63 x i1>, <63 x i1>* undef, align 4
+ load <33 x i1>, ptr undef, align 4
+ load <34 x i1>, ptr undef, align 4
+ load <35 x i1>, ptr undef, align 4
+ load <36 x i1>, ptr undef, align 4
+ load <37 x i1>, ptr undef, align 4
+ load <38 x i1>, ptr undef, align 4
+ load <39 x i1>, ptr undef, align 4
+ load <40 x i1>, ptr undef, align 4
+ load <41 x i1>, ptr undef, align 4
+ load <42 x i1>, ptr undef, align 4
+ load <43 x i1>, ptr undef, align 4
+ load <44 x i1>, ptr undef, align 4
+ load <45 x i1>, ptr undef, align 4
+ load <46 x i1>, ptr undef, align 4
+ load <47 x i1>, ptr undef, align 4
+ load <48 x i1>, ptr undef, align 4
+ load <49 x i1>, ptr undef, align 4
+ load <50 x i1>, ptr undef, align 4
+ load <51 x i1>, ptr undef, align 4
+ load <52 x i1>, ptr undef, align 4
+ load <53 x i1>, ptr undef, align 4
+ load <54 x i1>, ptr undef, align 4
+ load <55 x i1>, ptr undef, align 4
+ load <56 x i1>, ptr undef, align 4
+ load <57 x i1>, ptr undef, align 4
+ load <58 x i1>, ptr undef, align 4
+ load <59 x i1>, ptr undef, align 4
+ load <60 x i1>, ptr undef, align 4
+ load <61 x i1>, ptr undef, align 4
+ load <62 x i1>, ptr undef, align 4
+ load <63 x i1>, ptr undef, align 4
; <64 x i1> is ZMM
ret i32 undef
define i32 @loads_align1(i32 %arg) {
; Scalars
; SSE2-LABEL: 'loads_align1'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load i1*, i1** undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <128 x i1>, <128 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, <16 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, <8 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, <4 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, <2 x i64>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, <1 x i128>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, <4 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, <2 x double>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x i8*>, <2 x i8*>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, <16 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, <8 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, <4 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, <2 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, <1 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = load <256 x i1>, <256 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <32 x i8>, <32 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <16 x i16>, <16 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = load <8 x i32>, <8 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = load <4 x i64>, <4 x i64>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, <2 x i128>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, <1 x i256>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = load <8 x float>, <8 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = load <4 x double>, <4 x double>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %35 = load <4 x i8*>, <4 x i8*>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, <32 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %37 = load <512 x i1>, <512 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %38 = load <64 x i8>, <64 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %39 = load <32 x i16>, <32 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %40 = load <16 x i32>, <16 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %41 = load <8 x i64>, <8 x i64>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, <4 x i128>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, <2 x i256>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, <1 x i512>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = load <16 x float>, <16 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %46 = load <8 x double>, <8 x double>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %47 = load <8 x i8*>, <8 x i8*>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, <64 x i1>* undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load ptr, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load ptr, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <128 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x ptr>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = load <256 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <32 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <16 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = load <8 x i32>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = load <4 x i64>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = load <8 x float>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = load <4 x double>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %35 = load <4 x ptr>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %37 = load <512 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %38 = load <64 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %39 = load <32 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %40 = load <16 x i32>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %41 = load <8 x i64>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = load <16 x float>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %46 = load <8 x double>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %47 = load <8 x ptr>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, ptr undef, align 1
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE41-LABEL: 'loads_align1'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load i1*, i1** undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <128 x i1>, <128 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, <16 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, <8 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, <4 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, <2 x i64>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, <1 x i128>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, <4 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, <2 x double>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x i8*>, <2 x i8*>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, <16 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, <8 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, <4 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, <2 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, <1 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = load <256 x i1>, <256 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <32 x i8>, <32 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <16 x i16>, <16 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = load <8 x i32>, <8 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = load <4 x i64>, <4 x i64>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, <2 x i128>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, <1 x i256>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = load <8 x float>, <8 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = load <4 x double>, <4 x double>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %35 = load <4 x i8*>, <4 x i8*>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, <32 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %37 = load <512 x i1>, <512 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %38 = load <64 x i8>, <64 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %39 = load <32 x i16>, <32 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %40 = load <16 x i32>, <16 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %41 = load <8 x i64>, <8 x i64>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, <4 x i128>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, <2 x i256>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, <1 x i512>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = load <16 x float>, <16 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %46 = load <8 x double>, <8 x double>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %47 = load <8 x i8*>, <8 x i8*>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, <64 x i1>* undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load ptr, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load ptr, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <128 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x ptr>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = load <256 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <32 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <16 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = load <8 x i32>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = load <4 x i64>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = load <8 x float>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = load <4 x double>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %35 = load <4 x ptr>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %37 = load <512 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %38 = load <64 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %39 = load <32 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %40 = load <16 x i32>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %41 = load <8 x i64>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = load <16 x float>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %46 = load <8 x double>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %47 = load <8 x ptr>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, ptr undef, align 1
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'loads_align1'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load i1*, i1** undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <128 x i1>, <128 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, <16 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, <8 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, <4 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, <2 x i64>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, <1 x i128>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, <4 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, <2 x double>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x i8*>, <2 x i8*>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, <16 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, <8 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, <4 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, <2 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, <1 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = load <256 x i1>, <256 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = load <32 x i8>, <32 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = load <16 x i16>, <16 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = load <8 x i32>, <8 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = load <4 x i64>, <4 x i64>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, <2 x i128>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, <1 x i256>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = load <8 x float>, <8 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = load <4 x double>, <4 x double>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = load <4 x i8*>, <4 x i8*>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, <32 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %37 = load <512 x i1>, <512 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %38 = load <64 x i8>, <64 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %39 = load <32 x i16>, <32 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %40 = load <16 x i32>, <16 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %41 = load <8 x i64>, <8 x i64>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, <4 x i128>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, <2 x i256>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, <1 x i512>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %45 = load <16 x float>, <16 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %46 = load <8 x double>, <8 x double>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = load <8 x i8*>, <8 x i8*>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, <64 x i1>* undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load ptr, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load ptr, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <128 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x ptr>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = load <256 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = load <32 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = load <16 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = load <8 x i32>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = load <4 x i64>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = load <8 x float>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = load <4 x double>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = load <4 x ptr>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %37 = load <512 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %38 = load <64 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %39 = load <32 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %40 = load <16 x i32>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %41 = load <8 x i64>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %45 = load <16 x float>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %46 = load <8 x double>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = load <8 x ptr>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, ptr undef, align 1
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'loads_align1'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load i1*, i1** undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = load <128 x i1>, <128 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, <16 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, <8 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, <4 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, <2 x i64>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, <1 x i128>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, <4 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, <2 x double>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x i8*>, <2 x i8*>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, <16 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, <8 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, <4 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, <2 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, <1 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = load <256 x i1>, <256 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = load <32 x i8>, <32 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = load <16 x i16>, <16 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = load <8 x i32>, <8 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = load <4 x i64>, <4 x i64>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, <2 x i128>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, <1 x i256>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = load <8 x float>, <8 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = load <4 x double>, <4 x double>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = load <4 x i8*>, <4 x i8*>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, <32 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %37 = load <512 x i1>, <512 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <64 x i8>, <64 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %39 = load <32 x i16>, <32 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <16 x i32>, <16 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = load <8 x i64>, <8 x i64>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, <4 x i128>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, <2 x i256>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, <1 x i512>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %45 = load <16 x float>, <16 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %46 = load <8 x double>, <8 x double>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %47 = load <8 x i8*>, <8 x i8*>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, <64 x i1>* undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load ptr, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load ptr, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = load <128 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x ptr>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = load <256 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = load <32 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = load <16 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = load <8 x i32>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = load <4 x i64>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = load <8 x float>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = load <4 x double>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = load <4 x ptr>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %37 = load <512 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <64 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %39 = load <32 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <16 x i32>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = load <8 x i64>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %45 = load <16 x float>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %46 = load <8 x double>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %47 = load <8 x ptr>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, ptr undef, align 1
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- load i8, i8* undef, align 1
- load i16, i16* undef, align 1
- load i32, i32* undef, align 1
- load i64, i64* undef, align 1
- load i128, i128* undef, align 1
- load i256, i256* undef, align 1
- load i512, i512* undef, align 1
+ load i8, ptr undef, align 1
+ load i16, ptr undef, align 1
+ load i32, ptr undef, align 1
+ load i64, ptr undef, align 1
+ load i128, ptr undef, align 1
+ load i256, ptr undef, align 1
+ load i512, ptr undef, align 1
- load float, float* undef, align 1
- load double, double* undef, align 1
+ load float, ptr undef, align 1
+ load double, ptr undef, align 1
- load i8*, i8** undef, align 1
+ load ptr, ptr undef, align 1
- load i1*, i1** undef, align 1
+ load ptr, ptr undef, align 1
; XMM (128-bit) vectors
- load <128 x i1>, <128 x i1>* undef, align 1
- load <16 x i8>, <16 x i8>* undef, align 1
- load <8 x i16>, <8 x i16>* undef, align 1
- load <4 x i32>, <4 x i32>* undef, align 1
- load <2 x i64>, <2 x i64>* undef, align 1
- load <1 x i128>, <1 x i128>* undef, align 1
+ load <128 x i1>, ptr undef, align 1
+ load <16 x i8>, ptr undef, align 1
+ load <8 x i16>, ptr undef, align 1
+ load <4 x i32>, ptr undef, align 1
+ load <2 x i64>, ptr undef, align 1
+ load <1 x i128>, ptr undef, align 1
- load <4 x float>, <4 x float>* undef, align 1
- load <2 x double>, <2 x double>* undef, align 1
+ load <4 x float>, ptr undef, align 1
+ load <2 x double>, ptr undef, align 1
- load <2 x i8*>, <2 x i8*>* undef, align 1
+ load <2 x ptr>, ptr undef, align 1
- load <16 x i1>, <16 x i1>* undef, align 1
- load <8 x i1>, <8 x i1>* undef, align 1
- load <4 x i1>, <4 x i1>* undef, align 1
- load <2 x i1>, <2 x i1>* undef, align 1
- load <1 x i1>, <1 x i1>* undef, align 1
+ load <16 x i1>, ptr undef, align 1
+ load <8 x i1>, ptr undef, align 1
+ load <4 x i1>, ptr undef, align 1
+ load <2 x i1>, ptr undef, align 1
+ load <1 x i1>, ptr undef, align 1
; YMM (256-bit) vectors
- load <256 x i1>, <256 x i1>* undef, align 1
- load <32 x i8>, <32 x i8>* undef, align 1
- load <16 x i16>, <16 x i16>* undef, align 1
- load <8 x i32>, <8 x i32>* undef, align 1
- load <4 x i64>, <4 x i64>* undef, align 1
- load <2 x i128>, <2 x i128>* undef, align 1
- load <1 x i256>, <1 x i256>* undef, align 1
+ load <256 x i1>, ptr undef, align 1
+ load <32 x i8>, ptr undef, align 1
+ load <16 x i16>, ptr undef, align 1
+ load <8 x i32>, ptr undef, align 1
+ load <4 x i64>, ptr undef, align 1
+ load <2 x i128>, ptr undef, align 1
+ load <1 x i256>, ptr undef, align 1
- load <8 x float>, <8 x float>* undef, align 1
- load <4 x double>, <4 x double>* undef, align 1
+ load <8 x float>, ptr undef, align 1
+ load <4 x double>, ptr undef, align 1
- load <4 x i8*>, <4 x i8*>* undef, align 1
+ load <4 x ptr>, ptr undef, align 1
- load <32 x i1>, <32 x i1>* undef, align 1
+ load <32 x i1>, ptr undef, align 1
; ZMM (512-bit) vectors
- load <512 x i1>, <512 x i1>* undef, align 1
- load <64 x i8>, <64 x i8>* undef, align 1
- load <32 x i16>, <32 x i16>* undef, align 1
- load <16 x i32>, <16 x i32>* undef, align 1
- load <8 x i64>, <8 x i64>* undef, align 1
- load <4 x i128>, <4 x i128>* undef, align 1
- load <2 x i256>, <2 x i256>* undef, align 1
- load <1 x i512>, <1 x i512>* undef, align 1
+ load <512 x i1>, ptr undef, align 1
+ load <64 x i8>, ptr undef, align 1
+ load <32 x i16>, ptr undef, align 1
+ load <16 x i32>, ptr undef, align 1
+ load <8 x i64>, ptr undef, align 1
+ load <4 x i128>, ptr undef, align 1
+ load <2 x i256>, ptr undef, align 1
+ load <1 x i512>, ptr undef, align 1
- load <16 x float>, <16 x float>* undef, align 1
- load <8 x double>, <8 x double>* undef, align 1
+ load <16 x float>, ptr undef, align 1
+ load <8 x double>, ptr undef, align 1
- load <8 x i8*>, <8 x i8*>* undef, align 1
+ load <8 x ptr>, ptr undef, align 1
- load <64 x i1>, <64 x i1>* undef, align 1
+ load <64 x i1>, ptr undef, align 1
ret i32 undef
}
define i32 @loads_partial_align1(i32 %arg) {
; Partial vectors with i64 elements (doubles as pointer-sized tests))
; SSE2-LABEL: 'loads_partial_align1'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, <3 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, <5 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, <6 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, <7 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %131 = load <9 x i1>, <9 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %132 = load <10 x i1>, <10 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %133 = load <11 x i1>, <11 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %134 = load <12 x i1>, <12 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %135 = load <13 x i1>, <13 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %136 = load <14 x i1>, <14 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %137 = load <15 x i1>, <15 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %138 = load <17 x i1>, <17 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %139 = load <18 x i1>, <18 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %140 = load <19 x i1>, <19 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %141 = load <20 x i1>, <20 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %142 = load <21 x i1>, <21 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %143 = load <22 x i1>, <22 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %144 = load <23 x i1>, <23 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %145 = load <24 x i1>, <24 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %146 = load <25 x i1>, <25 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %147 = load <26 x i1>, <26 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %148 = load <27 x i1>, <27 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %149 = load <28 x i1>, <28 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %150 = load <29 x i1>, <29 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %151 = load <30 x i1>, <30 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %152 = load <31 x i1>, <31 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %153 = load <33 x i1>, <33 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %154 = load <34 x i1>, <34 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %155 = load <35 x i1>, <35 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %156 = load <36 x i1>, <36 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %157 = load <37 x i1>, <37 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %158 = load <38 x i1>, <38 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %159 = load <39 x i1>, <39 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %160 = load <40 x i1>, <40 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %161 = load <41 x i1>, <41 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %162 = load <42 x i1>, <42 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %163 = load <43 x i1>, <43 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %164 = load <44 x i1>, <44 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %165 = load <45 x i1>, <45 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %166 = load <46 x i1>, <46 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %167 = load <47 x i1>, <47 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %168 = load <48 x i1>, <48 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %169 = load <49 x i1>, <49 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %170 = load <50 x i1>, <50 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %171 = load <51 x i1>, <51 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %172 = load <52 x i1>, <52 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %173 = load <53 x i1>, <53 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %174 = load <54 x i1>, <54 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %175 = load <55 x i1>, <55 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %176 = load <56 x i1>, <56 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %177 = load <57 x i1>, <57 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %178 = load <58 x i1>, <58 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %179 = load <59 x i1>, <59 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %180 = load <60 x i1>, <60 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %181 = load <61 x i1>, <61 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %182 = load <62 x i1>, <62 x i1>* undef, align 1
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %183 = load <63 x i1>, <63 x i1>* undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = load <3 x i64>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = load <5 x i64>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load <7 x i64>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = load <3 x double>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = load <5 x double>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = load <7 x double>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %13 = load <3 x i32>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = load <5 x i32>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = load <7 x i32>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = load <9 x i32>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %19 = load <11 x i32>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = load <13 x i32>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %23 = load <15 x i32>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %26 = load <3 x float>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <5 x float>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %29 = load <7 x float>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %32 = load <11 x float>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = load <13 x float>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %36 = load <15 x float>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %42 = load <6 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <7 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %44 = load <9 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %46 = load <11 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %48 = load <13 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %49 = load <14 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %50 = load <15 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %51 = load <17 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %52 = load <18 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %53 = load <19 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %56 = load <22 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %57 = load <23 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %59 = load <25 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %60 = load <26 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %61 = load <27 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %63 = load <29 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %64 = load <30 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %65 = load <31 x i16>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %68 = load <3 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %70 = load <5 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %71 = load <6 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %72 = load <7 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %74 = load <9 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %75 = load <10 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %76 = load <11 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %77 = load <12 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %78 = load <13 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %79 = load <14 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %80 = load <15 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %81 = load <17 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %83 = load <19 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %85 = load <21 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %86 = load <22 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %87 = load <23 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %89 = load <25 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %90 = load <26 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %91 = load <27 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %92 = load <28 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %93 = load <29 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %94 = load <30 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %95 = load <31 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %96 = load <33 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %97 = load <34 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %98 = load <35 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %99 = load <36 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %100 = load <37 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %101 = load <38 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %102 = load <39 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %104 = load <41 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %105 = load <42 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %106 = load <43 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %107 = load <44 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %108 = load <45 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %109 = load <46 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %110 = load <47 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %112 = load <49 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %113 = load <50 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %114 = load <51 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %115 = load <52 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %116 = load <53 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %117 = load <54 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %118 = load <55 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %120 = load <57 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %121 = load <58 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %122 = load <59 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %123 = load <60 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %124 = load <61 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %125 = load <62 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %126 = load <63 x i8>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %131 = load <9 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %132 = load <10 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %133 = load <11 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %134 = load <12 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %135 = load <13 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %136 = load <14 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %137 = load <15 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %138 = load <17 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %139 = load <18 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %140 = load <19 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %141 = load <20 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %142 = load <21 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %143 = load <22 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %144 = load <23 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %145 = load <24 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %146 = load <25 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %147 = load <26 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %148 = load <27 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %149 = load <28 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %150 = load <29 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %151 = load <30 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %152 = load <31 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %153 = load <33 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %154 = load <34 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %155 = load <35 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %156 = load <36 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %157 = load <37 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %158 = load <38 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %159 = load <39 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %160 = load <40 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %161 = load <41 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %162 = load <42 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %163 = load <43 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %164 = load <44 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %165 = load <45 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %166 = load <46 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %167 = load <47 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %168 = load <48 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %169 = load <49 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %170 = load <50 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %171 = load <51 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %172 = load <52 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %173 = load <53 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %174 = load <54 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %175 = load <55 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %176 = load <56 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %177 = load <57 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %178 = load <58 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %179 = load <59 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %180 = load <60 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %181 = load <61 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %182 = load <62 x i1>, ptr undef, align 1
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %183 = load <63 x i1>, ptr undef, align 1
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE41-LABEL: 'loads_partial_align1'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, <3 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, <5 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, <6 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, <7 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %131 = load <9 x i1>, <9 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %132 = load <10 x i1>, <10 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %133 = load <11 x i1>, <11 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %134 = load <12 x i1>, <12 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %135 = load <13 x i1>, <13 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %136 = load <14 x i1>, <14 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %137 = load <15 x i1>, <15 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %138 = load <17 x i1>, <17 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %139 = load <18 x i1>, <18 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %140 = load <19 x i1>, <19 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %141 = load <20 x i1>, <20 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %142 = load <21 x i1>, <21 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %143 = load <22 x i1>, <22 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %144 = load <23 x i1>, <23 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %145 = load <24 x i1>, <24 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %146 = load <25 x i1>, <25 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %147 = load <26 x i1>, <26 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %148 = load <27 x i1>, <27 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %149 = load <28 x i1>, <28 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %150 = load <29 x i1>, <29 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %151 = load <30 x i1>, <30 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %152 = load <31 x i1>, <31 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %153 = load <33 x i1>, <33 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %154 = load <34 x i1>, <34 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %155 = load <35 x i1>, <35 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %156 = load <36 x i1>, <36 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %157 = load <37 x i1>, <37 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %158 = load <38 x i1>, <38 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %159 = load <39 x i1>, <39 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %160 = load <40 x i1>, <40 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %161 = load <41 x i1>, <41 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %162 = load <42 x i1>, <42 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %163 = load <43 x i1>, <43 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %164 = load <44 x i1>, <44 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %165 = load <45 x i1>, <45 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %166 = load <46 x i1>, <46 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %167 = load <47 x i1>, <47 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %168 = load <48 x i1>, <48 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %169 = load <49 x i1>, <49 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %170 = load <50 x i1>, <50 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %171 = load <51 x i1>, <51 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %172 = load <52 x i1>, <52 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %173 = load <53 x i1>, <53 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %174 = load <54 x i1>, <54 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %175 = load <55 x i1>, <55 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %176 = load <56 x i1>, <56 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %177 = load <57 x i1>, <57 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %178 = load <58 x i1>, <58 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %179 = load <59 x i1>, <59 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %180 = load <60 x i1>, <60 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %181 = load <61 x i1>, <61 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %182 = load <62 x i1>, <62 x i1>* undef, align 1
-; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %183 = load <63 x i1>, <63 x i1>* undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = load <3 x i64>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = load <5 x i64>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load <7 x i64>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = load <3 x double>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = load <5 x double>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = load <7 x double>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x i32>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = load <5 x i32>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = load <7 x i32>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = load <9 x i32>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %19 = load <11 x i32>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = load <13 x i32>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <15 x i32>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = load <3 x float>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <5 x float>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %29 = load <7 x float>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %32 = load <11 x float>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = load <13 x float>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %36 = load <15 x float>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %42 = load <6 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %43 = load <7 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %44 = load <9 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %46 = load <11 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %48 = load <13 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %49 = load <14 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %50 = load <15 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %51 = load <17 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %52 = load <18 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %53 = load <19 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %56 = load <22 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %57 = load <23 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %59 = load <25 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %60 = load <26 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %61 = load <27 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %63 = load <29 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %64 = load <30 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %65 = load <31 x i16>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %68 = load <3 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %71 = load <6 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %72 = load <7 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %75 = load <10 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %76 = load <11 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %77 = load <12 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %78 = load <13 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %79 = load <14 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %80 = load <15 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %81 = load <17 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %83 = load <19 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %85 = load <21 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %86 = load <22 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %87 = load <23 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %89 = load <25 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %90 = load <26 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %91 = load <27 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %92 = load <28 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %93 = load <29 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %94 = load <30 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %95 = load <31 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %96 = load <33 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %97 = load <34 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %98 = load <35 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %99 = load <36 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %100 = load <37 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %101 = load <38 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %102 = load <39 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %104 = load <41 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %105 = load <42 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %106 = load <43 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %107 = load <44 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %108 = load <45 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %109 = load <46 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %110 = load <47 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %112 = load <49 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %113 = load <50 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %114 = load <51 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %115 = load <52 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %116 = load <53 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %117 = load <54 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %118 = load <55 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %120 = load <57 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %121 = load <58 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %122 = load <59 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %123 = load <60 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %124 = load <61 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %125 = load <62 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %126 = load <63 x i8>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %131 = load <9 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %132 = load <10 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %133 = load <11 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %134 = load <12 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %135 = load <13 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %136 = load <14 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %137 = load <15 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %138 = load <17 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %139 = load <18 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %140 = load <19 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %141 = load <20 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %142 = load <21 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %143 = load <22 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %144 = load <23 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %145 = load <24 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %146 = load <25 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %147 = load <26 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %148 = load <27 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %149 = load <28 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %150 = load <29 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %151 = load <30 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %152 = load <31 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %153 = load <33 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %154 = load <34 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %155 = load <35 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %156 = load <36 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %157 = load <37 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %158 = load <38 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %159 = load <39 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %160 = load <40 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %161 = load <41 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %162 = load <42 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %163 = load <43 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %164 = load <44 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %165 = load <45 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %166 = load <46 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %167 = load <47 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %168 = load <48 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %169 = load <49 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %170 = load <50 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %171 = load <51 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %172 = load <52 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %173 = load <53 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %174 = load <54 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %175 = load <55 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %176 = load <56 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %177 = load <57 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %178 = load <58 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %179 = load <59 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %180 = load <60 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %181 = load <61 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %182 = load <62 x i1>, ptr undef, align 1
+; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %183 = load <63 x i1>, ptr undef, align 1
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'loads_partial_align1'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, <3 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, <5 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, <6 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, <7 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %131 = load <9 x i1>, <9 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %132 = load <10 x i1>, <10 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %133 = load <11 x i1>, <11 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %134 = load <12 x i1>, <12 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %135 = load <13 x i1>, <13 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %136 = load <14 x i1>, <14 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %137 = load <15 x i1>, <15 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %138 = load <17 x i1>, <17 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %139 = load <18 x i1>, <18 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %140 = load <19 x i1>, <19 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %141 = load <20 x i1>, <20 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %142 = load <21 x i1>, <21 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %143 = load <22 x i1>, <22 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %144 = load <23 x i1>, <23 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %145 = load <24 x i1>, <24 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %146 = load <25 x i1>, <25 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %147 = load <26 x i1>, <26 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %148 = load <27 x i1>, <27 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %149 = load <28 x i1>, <28 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %150 = load <29 x i1>, <29 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %151 = load <30 x i1>, <30 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %152 = load <31 x i1>, <31 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %153 = load <33 x i1>, <33 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %154 = load <34 x i1>, <34 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %155 = load <35 x i1>, <35 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %156 = load <36 x i1>, <36 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %157 = load <37 x i1>, <37 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %158 = load <38 x i1>, <38 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %159 = load <39 x i1>, <39 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %160 = load <40 x i1>, <40 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %161 = load <41 x i1>, <41 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %162 = load <42 x i1>, <42 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %163 = load <43 x i1>, <43 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %164 = load <44 x i1>, <44 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %165 = load <45 x i1>, <45 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %166 = load <46 x i1>, <46 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %167 = load <47 x i1>, <47 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %168 = load <48 x i1>, <48 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %169 = load <49 x i1>, <49 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %170 = load <50 x i1>, <50 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %171 = load <51 x i1>, <51 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %172 = load <52 x i1>, <52 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %173 = load <53 x i1>, <53 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %174 = load <54 x i1>, <54 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %175 = load <55 x i1>, <55 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %176 = load <56 x i1>, <56 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %177 = load <57 x i1>, <57 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %178 = load <58 x i1>, <58 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %179 = load <59 x i1>, <59 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %180 = load <60 x i1>, <60 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %181 = load <61 x i1>, <61 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %182 = load <62 x i1>, <62 x i1>* undef, align 1
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %183 = load <63 x i1>, <63 x i1>* undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = load <3 x i64>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = load <5 x i64>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load <6 x i64>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load <7 x i64>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = load <3 x double>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = load <5 x double>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = load <6 x double>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = load <7 x double>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x i32>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %14 = load <5 x i32>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <6 x i32>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <7 x i32>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <9 x i32>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = load <10 x i32>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %19 = load <11 x i32>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = load <12 x i32>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %21 = load <13 x i32>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %23 = load <15 x i32>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = load <3 x float>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %27 = load <5 x float>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = load <6 x float>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %29 = load <7 x float>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = load <9 x float>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = load <10 x float>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <11 x float>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = load <12 x float>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = load <13 x float>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %36 = load <15 x float>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %42 = load <6 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %43 = load <7 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %44 = load <9 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = load <10 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %46 = load <11 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %47 = load <12 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %48 = load <13 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %49 = load <14 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %50 = load <15 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %51 = load <17 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %52 = load <18 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %53 = load <19 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %54 = load <20 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %55 = load <21 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %56 = load <22 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %57 = load <23 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %58 = load <24 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %59 = load <25 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %60 = load <26 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %61 = load <27 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %63 = load <29 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %64 = load <30 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %65 = load <31 x i16>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %68 = load <3 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %71 = load <6 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %72 = load <7 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %75 = load <10 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %76 = load <11 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %77 = load <12 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %78 = load <13 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %79 = load <14 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %80 = load <15 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %81 = load <17 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %82 = load <18 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %83 = load <19 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %84 = load <20 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %85 = load <21 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %86 = load <22 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %87 = load <23 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %88 = load <24 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %89 = load <25 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %90 = load <26 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %91 = load <27 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %92 = load <28 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %93 = load <29 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %94 = load <30 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %95 = load <31 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %96 = load <33 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %97 = load <34 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %98 = load <35 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %99 = load <36 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %100 = load <37 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %101 = load <38 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %102 = load <39 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %103 = load <40 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %104 = load <41 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %105 = load <42 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %106 = load <43 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %107 = load <44 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %108 = load <45 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %109 = load <46 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %110 = load <47 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %111 = load <48 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %112 = load <49 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %113 = load <50 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %114 = load <51 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %115 = load <52 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %116 = load <53 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %117 = load <54 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %118 = load <55 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %120 = load <57 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %121 = load <58 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %122 = load <59 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %123 = load <60 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %124 = load <61 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %125 = load <62 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %126 = load <63 x i8>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %131 = load <9 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %132 = load <10 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %133 = load <11 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %134 = load <12 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %135 = load <13 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %136 = load <14 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %137 = load <15 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %138 = load <17 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %139 = load <18 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %140 = load <19 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %141 = load <20 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %142 = load <21 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %143 = load <22 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %144 = load <23 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %145 = load <24 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %146 = load <25 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %147 = load <26 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %148 = load <27 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %149 = load <28 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %150 = load <29 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %151 = load <30 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %152 = load <31 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %153 = load <33 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %154 = load <34 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %155 = load <35 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %156 = load <36 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %157 = load <37 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %158 = load <38 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %159 = load <39 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %160 = load <40 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %161 = load <41 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %162 = load <42 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %163 = load <43 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %164 = load <44 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %165 = load <45 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %166 = load <46 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %167 = load <47 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %168 = load <48 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %169 = load <49 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %170 = load <50 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %171 = load <51 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %172 = load <52 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %173 = load <53 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %174 = load <54 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %175 = load <55 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %176 = load <56 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %177 = load <57 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %178 = load <58 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %179 = load <59 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %180 = load <60 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %181 = load <61 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %182 = load <62 x i1>, ptr undef, align 1
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %183 = load <63 x i1>, ptr undef, align 1
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'loads_partial_align1'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, <3 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, <5 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, <6 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, <7 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %131 = load <9 x i1>, <9 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %132 = load <10 x i1>, <10 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %133 = load <11 x i1>, <11 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %134 = load <12 x i1>, <12 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %135 = load <13 x i1>, <13 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %136 = load <14 x i1>, <14 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %137 = load <15 x i1>, <15 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %138 = load <17 x i1>, <17 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %139 = load <18 x i1>, <18 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %140 = load <19 x i1>, <19 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %141 = load <20 x i1>, <20 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %142 = load <21 x i1>, <21 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %143 = load <22 x i1>, <22 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %144 = load <23 x i1>, <23 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %145 = load <24 x i1>, <24 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %146 = load <25 x i1>, <25 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %147 = load <26 x i1>, <26 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %148 = load <27 x i1>, <27 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %149 = load <28 x i1>, <28 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %150 = load <29 x i1>, <29 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %151 = load <30 x i1>, <30 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %152 = load <31 x i1>, <31 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %153 = load <33 x i1>, <33 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %154 = load <34 x i1>, <34 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %155 = load <35 x i1>, <35 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %156 = load <36 x i1>, <36 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %157 = load <37 x i1>, <37 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %158 = load <38 x i1>, <38 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %159 = load <39 x i1>, <39 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %160 = load <40 x i1>, <40 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %161 = load <41 x i1>, <41 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %162 = load <42 x i1>, <42 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %163 = load <43 x i1>, <43 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %164 = load <44 x i1>, <44 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %165 = load <45 x i1>, <45 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %166 = load <46 x i1>, <46 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %167 = load <47 x i1>, <47 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %168 = load <48 x i1>, <48 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %169 = load <49 x i1>, <49 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %170 = load <50 x i1>, <50 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %171 = load <51 x i1>, <51 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %172 = load <52 x i1>, <52 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %173 = load <53 x i1>, <53 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %174 = load <54 x i1>, <54 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %175 = load <55 x i1>, <55 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %176 = load <56 x i1>, <56 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %177 = load <57 x i1>, <57 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %178 = load <58 x i1>, <58 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %179 = load <59 x i1>, <59 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %180 = load <60 x i1>, <60 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %181 = load <61 x i1>, <61 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %182 = load <62 x i1>, <62 x i1>* undef, align 1
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %183 = load <63 x i1>, <63 x i1>* undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = load <3 x i64>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = load <5 x i64>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %5 = load <7 x i64>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = load <3 x double>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = load <5 x double>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %10 = load <7 x double>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %13 = load <3 x i32>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %14 = load <5 x i32>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %15 = load <6 x i32>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = load <7 x i32>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %17 = load <9 x i32>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %19 = load <11 x i32>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %21 = load <13 x i32>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %22 = load <14 x i32>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = load <15 x i32>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %26 = load <3 x float>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %27 = load <5 x float>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = load <6 x float>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %29 = load <7 x float>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %32 = load <11 x float>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %34 = load <13 x float>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %35 = load <14 x float>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %36 = load <15 x float>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %39 = load <3 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %41 = load <5 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %42 = load <6 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %43 = load <7 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %44 = load <9 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = load <10 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %46 = load <11 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %47 = load <12 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %48 = load <13 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %49 = load <14 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %50 = load <15 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %51 = load <17 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %52 = load <18 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %53 = load <19 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %55 = load <21 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %56 = load <22 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %57 = load <23 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %59 = load <25 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %60 = load <26 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %61 = load <27 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %62 = load <28 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %63 = load <29 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %64 = load <30 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %65 = load <31 x i16>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %68 = load <3 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %70 = load <5 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %71 = load <6 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %72 = load <7 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %74 = load <9 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %75 = load <10 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %76 = load <11 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %77 = load <12 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %78 = load <13 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %79 = load <14 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %80 = load <15 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %81 = load <17 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %82 = load <18 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %83 = load <19 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %84 = load <20 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %85 = load <21 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %86 = load <22 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %87 = load <23 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %88 = load <24 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %89 = load <25 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %90 = load <26 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %91 = load <27 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %92 = load <28 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %93 = load <29 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %94 = load <30 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %95 = load <31 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %96 = load <33 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %97 = load <34 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %98 = load <35 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %99 = load <36 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %100 = load <37 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %101 = load <38 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %102 = load <39 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %104 = load <41 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %105 = load <42 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %106 = load <43 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %107 = load <44 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %108 = load <45 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %109 = load <46 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %110 = load <47 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %112 = load <49 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %113 = load <50 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %114 = load <51 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %115 = load <52 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %116 = load <53 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %117 = load <54 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %118 = load <55 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %119 = load <56 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %120 = load <57 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %121 = load <58 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %122 = load <59 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %123 = load <60 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %124 = load <61 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %125 = load <62 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %126 = load <63 x i8>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %131 = load <9 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %132 = load <10 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %133 = load <11 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %134 = load <12 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %135 = load <13 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %136 = load <14 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %137 = load <15 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %138 = load <17 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %139 = load <18 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %140 = load <19 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %141 = load <20 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %142 = load <21 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %143 = load <22 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %144 = load <23 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %145 = load <24 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %146 = load <25 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %147 = load <26 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %148 = load <27 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %149 = load <28 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %150 = load <29 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %151 = load <30 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %152 = load <31 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %153 = load <33 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %154 = load <34 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %155 = load <35 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %156 = load <36 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %157 = load <37 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %158 = load <38 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %159 = load <39 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %160 = load <40 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %161 = load <41 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %162 = load <42 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %163 = load <43 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %164 = load <44 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %165 = load <45 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %166 = load <46 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %167 = load <47 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %168 = load <48 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %169 = load <49 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %170 = load <50 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %171 = load <51 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %172 = load <52 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %173 = load <53 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %174 = load <54 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %175 = load <55 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %176 = load <56 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %177 = load <57 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %178 = load <58 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %179 = load <59 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %180 = load <60 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %181 = load <61 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %182 = load <62 x i1>, ptr undef, align 1
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %183 = load <63 x i1>, ptr undef, align 1
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- load <1 x i64>, <1 x i64>* undef, align 1
+ load <1 x i64>, ptr undef, align 1
; <2 x i64> is XMM
- load <3 x i64>, <3 x i64>* undef, align 1
+ load <3 x i64>, ptr undef, align 1
; <4 x i64> is YMM
- load <5 x i64>, <5 x i64>* undef, align 1
- load <6 x i64>, <6 x i64>* undef, align 1
- load <7 x i64>, <7 x i64>* undef, align 1
+ load <5 x i64>, ptr undef, align 1
+ load <6 x i64>, ptr undef, align 1
+ load <7 x i64>, ptr undef, align 1
; <8 x i64> is ZMM
; Partial vectors with double elements
- load <1 x double>, <1 x double>* undef, align 1
+ load <1 x double>, ptr undef, align 1
; <2 x double> is XMM
- load <3 x double>, <3 x double>* undef, align 1
+ load <3 x double>, ptr undef, align 1
; <4 x double> is YMM
- load <5 x double>, <5 x double>* undef, align 1
- load <6 x double>, <6 x double>* undef, align 1
- load <7 x double>, <7 x double>* undef, align 1
+ load <5 x double>, ptr undef, align 1
+ load <6 x double>, ptr undef, align 1
+ load <7 x double>, ptr undef, align 1
; <8 x double> is ZMM
; Partial vectors with i32 elements
- load <1 x i32>, <1 x i32>* undef, align 1
- load <2 x i32>, <2 x i32>* undef, align 1
- load <3 x i32>, <3 x i32>* undef, align 1
+ load <1 x i32>, ptr undef, align 1
+ load <2 x i32>, ptr undef, align 1
+ load <3 x i32>, ptr undef, align 1
; <4 x i32> is XMM
- load <5 x i32>, <5 x i32>* undef, align 1
- load <6 x i32>, <6 x i32>* undef, align 1
- load <7 x i32>, <7 x i32>* undef, align 1
+ load <5 x i32>, ptr undef, align 1
+ load <6 x i32>, ptr undef, align 1
+ load <7 x i32>, ptr undef, align 1
; <8 x i32> is YMM
- load <9 x i32>, <9 x i32>* undef, align 1
- load <10 x i32>, <10 x i32>* undef, align 1
- load <11 x i32>, <11 x i32>* undef, align 1
- load <12 x i32>, <12 x i32>* undef, align 1
- load <13 x i32>, <13 x i32>* undef, align 1
- load <14 x i32>, <14 x i32>* undef, align 1
- load <15 x i32>, <15 x i32>* undef, align 1
+ load <9 x i32>, ptr undef, align 1
+ load <10 x i32>, ptr undef, align 1
+ load <11 x i32>, ptr undef, align 1
+ load <12 x i32>, ptr undef, align 1
+ load <13 x i32>, ptr undef, align 1
+ load <14 x i32>, ptr undef, align 1
+ load <15 x i32>, ptr undef, align 1
; <16 x i32> is ZMM
; Partial vectors with float elements
- load <1 x float>, <1 x float>* undef, align 1
- load <2 x float>, <2 x float>* undef, align 1
- load <3 x float>, <3 x float>* undef, align 1
+ load <1 x float>, ptr undef, align 1
+ load <2 x float>, ptr undef, align 1
+ load <3 x float>, ptr undef, align 1
; <4 x float> is XMM
- load <5 x float>, <5 x float>* undef, align 1
- load <6 x float>, <6 x float>* undef, align 1
- load <7 x float>, <7 x float>* undef, align 1
+ load <5 x float>, ptr undef, align 1
+ load <6 x float>, ptr undef, align 1
+ load <7 x float>, ptr undef, align 1
; <8 x float> is YMM
- load <9 x float>, <9 x float>* undef, align 1
- load <10 x float>, <10 x float>* undef, align 1
- load <11 x float>, <11 x float>* undef, align 1
- load <12 x float>, <12 x float>* undef, align 1
- load <13 x float>, <13 x float>* undef, align 1
- load <14 x float>, <14 x float>* undef, align 1
- load <15 x float>, <15 x float>* undef, align 1
+ load <9 x float>, ptr undef, align 1
+ load <10 x float>, ptr undef, align 1
+ load <11 x float>, ptr undef, align 1
+ load <12 x float>, ptr undef, align 1
+ load <13 x float>, ptr undef, align 1
+ load <14 x float>, ptr undef, align 1
+ load <15 x float>, ptr undef, align 1
; <16 x float> is ZMM
; Partial vectors with i16 elements
- load <1 x i16>, <1 x i16>* undef, align 1
- load <2 x i16>, <2 x i16>* undef, align 1
- load <3 x i16>, <3 x i16>* undef, align 1
- load <4 x i16>, <4 x i16>* undef, align 1
- load <5 x i16>, <5 x i16>* undef, align 1
- load <6 x i16>, <6 x i16>* undef, align 1
- load <7 x i16>, <7 x i16>* undef, align 1
+ load <1 x i16>, ptr undef, align 1
+ load <2 x i16>, ptr undef, align 1
+ load <3 x i16>, ptr undef, align 1
+ load <4 x i16>, ptr undef, align 1
+ load <5 x i16>, ptr undef, align 1
+ load <6 x i16>, ptr undef, align 1
+ load <7 x i16>, ptr undef, align 1
; <8 x i16> is XMM
- load <9 x i16>, <9 x i16>* undef, align 1
- load <10 x i16>, <10 x i16>* undef, align 1
- load <11 x i16>, <11 x i16>* undef, align 1
- load <12 x i16>, <12 x i16>* undef, align 1
- load <13 x i16>, <13 x i16>* undef, align 1
- load <14 x i16>, <14 x i16>* undef, align 1
- load <15 x i16>, <15 x i16>* undef, align 1
+ load <9 x i16>, ptr undef, align 1
+ load <10 x i16>, ptr undef, align 1
+ load <11 x i16>, ptr undef, align 1
+ load <12 x i16>, ptr undef, align 1
+ load <13 x i16>, ptr undef, align 1
+ load <14 x i16>, ptr undef, align 1
+ load <15 x i16>, ptr undef, align 1
; <16 x i16> is YMM
- load <17 x i16>, <17 x i16>* undef, align 1
- load <18 x i16>, <18 x i16>* undef, align 1
- load <19 x i16>, <19 x i16>* undef, align 1
- load <20 x i16>, <20 x i16>* undef, align 1
- load <21 x i16>, <21 x i16>* undef, align 1
- load <22 x i16>, <22 x i16>* undef, align 1
- load <23 x i16>, <23 x i16>* undef, align 1
- load <24 x i16>, <24 x i16>* undef, align 1
- load <25 x i16>, <25 x i16>* undef, align 1
- load <26 x i16>, <26 x i16>* undef, align 1
- load <27 x i16>, <27 x i16>* undef, align 1
- load <28 x i16>, <28 x i16>* undef, align 1
- load <29 x i16>, <29 x i16>* undef, align 1
- load <30 x i16>, <30 x i16>* undef, align 1
- load <31 x i16>, <31 x i16>* undef, align 1
+ load <17 x i16>, ptr undef, align 1
+ load <18 x i16>, ptr undef, align 1
+ load <19 x i16>, ptr undef, align 1
+ load <20 x i16>, ptr undef, align 1
+ load <21 x i16>, ptr undef, align 1
+ load <22 x i16>, ptr undef, align 1
+ load <23 x i16>, ptr undef, align 1
+ load <24 x i16>, ptr undef, align 1
+ load <25 x i16>, ptr undef, align 1
+ load <26 x i16>, ptr undef, align 1
+ load <27 x i16>, ptr undef, align 1
+ load <28 x i16>, ptr undef, align 1
+ load <29 x i16>, ptr undef, align 1
+ load <30 x i16>, ptr undef, align 1
+ load <31 x i16>, ptr undef, align 1
; <32 x i16> is ZMM
; Partial vectors with i8 elements
- load <1 x i8>, <1 x i8>* undef, align 1
- load <2 x i8>, <2 x i8>* undef, align 1
- load <3 x i8>, <3 x i8>* undef, align 1
- load <4 x i8>, <4 x i8>* undef, align 1
- load <5 x i8>, <5 x i8>* undef, align 1
- load <6 x i8>, <6 x i8>* undef, align 1
- load <7 x i8>, <7 x i8>* undef, align 1
- load <8 x i8>, <8 x i8>* undef, align 1
- load <9 x i8>, <9 x i8>* undef, align 1
- load <10 x i8>, <10 x i8>* undef, align 1
- load <11 x i8>, <11 x i8>* undef, align 1
- load <12 x i8>, <12 x i8>* undef, align 1
- load <13 x i8>, <13 x i8>* undef, align 1
- load <14 x i8>, <14 x i8>* undef, align 1
- load <15 x i8>, <15 x i8>* undef, align 1
+ load <1 x i8>, ptr undef, align 1
+ load <2 x i8>, ptr undef, align 1
+ load <3 x i8>, ptr undef, align 1
+ load <4 x i8>, ptr undef, align 1
+ load <5 x i8>, ptr undef, align 1
+ load <6 x i8>, ptr undef, align 1
+ load <7 x i8>, ptr undef, align 1
+ load <8 x i8>, ptr undef, align 1
+ load <9 x i8>, ptr undef, align 1
+ load <10 x i8>, ptr undef, align 1
+ load <11 x i8>, ptr undef, align 1
+ load <12 x i8>, ptr undef, align 1
+ load <13 x i8>, ptr undef, align 1
+ load <14 x i8>, ptr undef, align 1
+ load <15 x i8>, ptr undef, align 1
; <16 x i8> is XMM
- load <17 x i8>, <17 x i8>* undef, align 1
- load <18 x i8>, <18 x i8>* undef, align 1
- load <19 x i8>, <19 x i8>* undef, align 1
- load <20 x i8>, <20 x i8>* undef, align 1
- load <21 x i8>, <21 x i8>* undef, align 1
- load <22 x i8>, <22 x i8>* undef, align 1
- load <23 x i8>, <23 x i8>* undef, align 1
- load <24 x i8>, <24 x i8>* undef, align 1
- load <25 x i8>, <25 x i8>* undef, align 1
- load <26 x i8>, <26 x i8>* undef, align 1
- load <27 x i8>, <27 x i8>* undef, align 1
- load <28 x i8>, <28 x i8>* undef, align 1
- load <29 x i8>, <29 x i8>* undef, align 1
- load <30 x i8>, <30 x i8>* undef, align 1
- load <31 x i8>, <31 x i8>* undef, align 1
+ load <17 x i8>, ptr undef, align 1
+ load <18 x i8>, ptr undef, align 1
+ load <19 x i8>, ptr undef, align 1
+ load <20 x i8>, ptr undef, align 1
+ load <21 x i8>, ptr undef, align 1
+ load <22 x i8>, ptr undef, align 1
+ load <23 x i8>, ptr undef, align 1
+ load <24 x i8>, ptr undef, align 1
+ load <25 x i8>, ptr undef, align 1
+ load <26 x i8>, ptr undef, align 1
+ load <27 x i8>, ptr undef, align 1
+ load <28 x i8>, ptr undef, align 1
+ load <29 x i8>, ptr undef, align 1
+ load <30 x i8>, ptr undef, align 1
+ load <31 x i8>, ptr undef, align 1
; <32 x i8> is YMM
- load <33 x i8>, <33 x i8>* undef, align 1
- load <34 x i8>, <34 x i8>* undef, align 1
- load <35 x i8>, <35 x i8>* undef, align 1
- load <36 x i8>, <36 x i8>* undef, align 1
- load <37 x i8>, <37 x i8>* undef, align 1
- load <38 x i8>, <38 x i8>* undef, align 1
- load <39 x i8>, <39 x i8>* undef, align 1
- load <40 x i8>, <40 x i8>* undef, align 1
- load <41 x i8>, <41 x i8>* undef, align 1
- load <42 x i8>, <42 x i8>* undef, align 1
- load <43 x i8>, <43 x i8>* undef, align 1
- load <44 x i8>, <44 x i8>* undef, align 1
- load <45 x i8>, <45 x i8>* undef, align 1
- load <46 x i8>, <46 x i8>* undef, align 1
- load <47 x i8>, <47 x i8>* undef, align 1
- load <48 x i8>, <48 x i8>* undef, align 1
- load <49 x i8>, <49 x i8>* undef, align 1
- load <50 x i8>, <50 x i8>* undef, align 1
- load <51 x i8>, <51 x i8>* undef, align 1
- load <52 x i8>, <52 x i8>* undef, align 1
- load <53 x i8>, <53 x i8>* undef, align 1
- load <54 x i8>, <54 x i8>* undef, align 1
- load <55 x i8>, <55 x i8>* undef, align 1
- load <56 x i8>, <56 x i8>* undef, align 1
- load <57 x i8>, <57 x i8>* undef, align 1
- load <58 x i8>, <58 x i8>* undef, align 1
- load <59 x i8>, <59 x i8>* undef, align 1
- load <60 x i8>, <60 x i8>* undef, align 1
- load <61 x i8>, <61 x i8>* undef, align 1
- load <62 x i8>, <62 x i8>* undef, align 1
- load <63 x i8>, <63 x i8>* undef, align 1
+ load <33 x i8>, ptr undef, align 1
+ load <34 x i8>, ptr undef, align 1
+ load <35 x i8>, ptr undef, align 1
+ load <36 x i8>, ptr undef, align 1
+ load <37 x i8>, ptr undef, align 1
+ load <38 x i8>, ptr undef, align 1
+ load <39 x i8>, ptr undef, align 1
+ load <40 x i8>, ptr undef, align 1
+ load <41 x i8>, ptr undef, align 1
+ load <42 x i8>, ptr undef, align 1
+ load <43 x i8>, ptr undef, align 1
+ load <44 x i8>, ptr undef, align 1
+ load <45 x i8>, ptr undef, align 1
+ load <46 x i8>, ptr undef, align 1
+ load <47 x i8>, ptr undef, align 1
+ load <48 x i8>, ptr undef, align 1
+ load <49 x i8>, ptr undef, align 1
+ load <50 x i8>, ptr undef, align 1
+ load <51 x i8>, ptr undef, align 1
+ load <52 x i8>, ptr undef, align 1
+ load <53 x i8>, ptr undef, align 1
+ load <54 x i8>, ptr undef, align 1
+ load <55 x i8>, ptr undef, align 1
+ load <56 x i8>, ptr undef, align 1
+ load <57 x i8>, ptr undef, align 1
+ load <58 x i8>, ptr undef, align 1
+ load <59 x i8>, ptr undef, align 1
+ load <60 x i8>, ptr undef, align 1
+ load <61 x i8>, ptr undef, align 1
+ load <62 x i8>, ptr undef, align 1
+ load <63 x i8>, ptr undef, align 1
; <64 x i8> is ZMM
; Partial vectors with i1 elements
; <1 x i1> is XMM
; <2 x i1> is XMM
- load <3 x i1>, <3 x i1>* undef, align 1
+ load <3 x i1>, ptr undef, align 1
; <4 x i1> is XMM
- load <5 x i1>, <5 x i1>* undef, align 1
- load <6 x i1>, <6 x i1>* undef, align 1
- load <7 x i1>, <7 x i1>* undef, align 1
+ load <5 x i1>, ptr undef, align 1
+ load <6 x i1>, ptr undef, align 1
+ load <7 x i1>, ptr undef, align 1
; <8 x i1> is XMM
- load <9 x i1>, <9 x i1>* undef, align 1
- load <10 x i1>, <10 x i1>* undef, align 1
- load <11 x i1>, <11 x i1>* undef, align 1
- load <12 x i1>, <12 x i1>* undef, align 1
- load <13 x i1>, <13 x i1>* undef, align 1
- load <14 x i1>, <14 x i1>* undef, align 1
- load <15 x i1>, <15 x i1>* undef, align 1
+ load <9 x i1>, ptr undef, align 1
+ load <10 x i1>, ptr undef, align 1
+ load <11 x i1>, ptr undef, align 1
+ load <12 x i1>, ptr undef, align 1
+ load <13 x i1>, ptr undef, align 1
+ load <14 x i1>, ptr undef, align 1
+ load <15 x i1>, ptr undef, align 1
; <16 x i1> is XMM
- load <17 x i1>, <17 x i1>* undef, align 1
- load <18 x i1>, <18 x i1>* undef, align 1
- load <19 x i1>, <19 x i1>* undef, align 1
- load <20 x i1>, <20 x i1>* undef, align 1
- load <21 x i1>, <21 x i1>* undef, align 1
- load <22 x i1>, <22 x i1>* undef, align 1
- load <23 x i1>, <23 x i1>* undef, align 1
- load <24 x i1>, <24 x i1>* undef, align 1
- load <25 x i1>, <25 x i1>* undef, align 1
- load <26 x i1>, <26 x i1>* undef, align 1
- load <27 x i1>, <27 x i1>* undef, align 1
- load <28 x i1>, <28 x i1>* undef, align 1
- load <29 x i1>, <29 x i1>* undef, align 1
- load <30 x i1>, <30 x i1>* undef, align 1
- load <31 x i1>, <31 x i1>* undef, align 1
+ load <17 x i1>, ptr undef, align 1
+ load <18 x i1>, ptr undef, align 1
+ load <19 x i1>, ptr undef, align 1
+ load <20 x i1>, ptr undef, align 1
+ load <21 x i1>, ptr undef, align 1
+ load <22 x i1>, ptr undef, align 1
+ load <23 x i1>, ptr undef, align 1
+ load <24 x i1>, ptr undef, align 1
+ load <25 x i1>, ptr undef, align 1
+ load <26 x i1>, ptr undef, align 1
+ load <27 x i1>, ptr undef, align 1
+ load <28 x i1>, ptr undef, align 1
+ load <29 x i1>, ptr undef, align 1
+ load <30 x i1>, ptr undef, align 1
+ load <31 x i1>, ptr undef, align 1
; <32 x i1> is YMM
- load <33 x i1>, <33 x i1>* undef, align 1
- load <34 x i1>, <34 x i1>* undef, align 1
- load <35 x i1>, <35 x i1>* undef, align 1
- load <36 x i1>, <36 x i1>* undef, align 1
- load <37 x i1>, <37 x i1>* undef, align 1
- load <38 x i1>, <38 x i1>* undef, align 1
- load <39 x i1>, <39 x i1>* undef, align 1
- load <40 x i1>, <40 x i1>* undef, align 1
- load <41 x i1>, <41 x i1>* undef, align 1
- load <42 x i1>, <42 x i1>* undef, align 1
- load <43 x i1>, <43 x i1>* undef, align 1
- load <44 x i1>, <44 x i1>* undef, align 1
- load <45 x i1>, <45 x i1>* undef, align 1
- load <46 x i1>, <46 x i1>* undef, align 1
- load <47 x i1>, <47 x i1>* undef, align 1
- load <48 x i1>, <48 x i1>* undef, align 1
- load <49 x i1>, <49 x i1>* undef, align 1
- load <50 x i1>, <50 x i1>* undef, align 1
- load <51 x i1>, <51 x i1>* undef, align 1
- load <52 x i1>, <52 x i1>* undef, align 1
- load <53 x i1>, <53 x i1>* undef, align 1
- load <54 x i1>, <54 x i1>* undef, align 1
- load <55 x i1>, <55 x i1>* undef, align 1
- load <56 x i1>, <56 x i1>* undef, align 1
- load <57 x i1>, <57 x i1>* undef, align 1
- load <58 x i1>, <58 x i1>* undef, align 1
- load <59 x i1>, <59 x i1>* undef, align 1
- load <60 x i1>, <60 x i1>* undef, align 1
- load <61 x i1>, <61 x i1>* undef, align 1
- load <62 x i1>, <62 x i1>* undef, align 1
- load <63 x i1>, <63 x i1>* undef, align 1
+ load <33 x i1>, ptr undef, align 1
+ load <34 x i1>, ptr undef, align 1
+ load <35 x i1>, ptr undef, align 1
+ load <36 x i1>, ptr undef, align 1
+ load <37 x i1>, ptr undef, align 1
+ load <38 x i1>, ptr undef, align 1
+ load <39 x i1>, ptr undef, align 1
+ load <40 x i1>, ptr undef, align 1
+ load <41 x i1>, ptr undef, align 1
+ load <42 x i1>, ptr undef, align 1
+ load <43 x i1>, ptr undef, align 1
+ load <44 x i1>, ptr undef, align 1
+ load <45 x i1>, ptr undef, align 1
+ load <46 x i1>, ptr undef, align 1
+ load <47 x i1>, ptr undef, align 1
+ load <48 x i1>, ptr undef, align 1
+ load <49 x i1>, ptr undef, align 1
+ load <50 x i1>, ptr undef, align 1
+ load <51 x i1>, ptr undef, align 1
+ load <52 x i1>, ptr undef, align 1
+ load <53 x i1>, ptr undef, align 1
+ load <54 x i1>, ptr undef, align 1
+ load <55 x i1>, ptr undef, align 1
+ load <56 x i1>, ptr undef, align 1
+ load <57 x i1>, ptr undef, align 1
+ load <58 x i1>, ptr undef, align 1
+ load <59 x i1>, ptr undef, align 1
+ load <60 x i1>, ptr undef, align 1
+ load <61 x i1>, ptr undef, align 1
+ load <62 x i1>, ptr undef, align 1
+ load <63 x i1>, ptr undef, align 1
; <64 x i1> is ZMM
ret i32 undef
define i32 @loads_align64(i32 %arg) {
; Scalars
; SSE2-LABEL: 'loads_align64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load i1*, i1** undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <128 x i1>, <128 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, <16 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, <8 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, <4 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, <2 x i64>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, <1 x i128>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, <4 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, <2 x double>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x i8*>, <2 x i8*>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, <16 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, <8 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, <4 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, <2 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, <1 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = load <256 x i1>, <256 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <32 x i8>, <32 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <16 x i16>, <16 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = load <8 x i32>, <8 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = load <4 x i64>, <4 x i64>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, <2 x i128>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, <1 x i256>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = load <8 x float>, <8 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = load <4 x double>, <4 x double>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %35 = load <4 x i8*>, <4 x i8*>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, <32 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %37 = load <512 x i1>, <512 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %38 = load <64 x i8>, <64 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %39 = load <32 x i16>, <32 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %40 = load <16 x i32>, <16 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %41 = load <8 x i64>, <8 x i64>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, <4 x i128>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, <2 x i256>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, <1 x i512>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = load <16 x float>, <16 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %46 = load <8 x double>, <8 x double>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %47 = load <8 x i8*>, <8 x i8*>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, <64 x i1>* undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load ptr, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load ptr, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <128 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x ptr>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = load <256 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <32 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <16 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = load <8 x i32>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = load <4 x i64>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = load <8 x float>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = load <4 x double>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %35 = load <4 x ptr>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %37 = load <512 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %38 = load <64 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %39 = load <32 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %40 = load <16 x i32>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %41 = load <8 x i64>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = load <16 x float>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %46 = load <8 x double>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %47 = load <8 x ptr>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, ptr undef, align 64
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE41-LABEL: 'loads_align64'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load i1*, i1** undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <128 x i1>, <128 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, <16 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, <8 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, <4 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, <2 x i64>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, <1 x i128>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, <4 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, <2 x double>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x i8*>, <2 x i8*>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, <16 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, <8 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, <4 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, <2 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, <1 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = load <256 x i1>, <256 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <32 x i8>, <32 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <16 x i16>, <16 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = load <8 x i32>, <8 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = load <4 x i64>, <4 x i64>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, <2 x i128>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, <1 x i256>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = load <8 x float>, <8 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = load <4 x double>, <4 x double>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %35 = load <4 x i8*>, <4 x i8*>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, <32 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %37 = load <512 x i1>, <512 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %38 = load <64 x i8>, <64 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %39 = load <32 x i16>, <32 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %40 = load <16 x i32>, <16 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %41 = load <8 x i64>, <8 x i64>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, <4 x i128>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, <2 x i256>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, <1 x i512>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = load <16 x float>, <16 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %46 = load <8 x double>, <8 x double>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %47 = load <8 x i8*>, <8 x i8*>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, <64 x i1>* undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load ptr, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load ptr, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <128 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x ptr>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %26 = load <256 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <32 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <16 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = load <8 x i32>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = load <4 x i64>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = load <8 x float>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = load <4 x double>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %35 = load <4 x ptr>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %37 = load <512 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %38 = load <64 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %39 = load <32 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %40 = load <16 x i32>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %41 = load <8 x i64>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %45 = load <16 x float>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %46 = load <8 x double>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %47 = load <8 x ptr>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, ptr undef, align 64
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'loads_align64'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load i1*, i1** undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <128 x i1>, <128 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, <16 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, <8 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, <4 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, <2 x i64>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, <1 x i128>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, <4 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, <2 x double>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x i8*>, <2 x i8*>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, <16 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, <8 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, <4 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, <2 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, <1 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = load <256 x i1>, <256 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = load <32 x i8>, <32 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = load <16 x i16>, <16 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = load <8 x i32>, <8 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = load <4 x i64>, <4 x i64>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, <2 x i128>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, <1 x i256>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = load <8 x float>, <8 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = load <4 x double>, <4 x double>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = load <4 x i8*>, <4 x i8*>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, <32 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %37 = load <512 x i1>, <512 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %38 = load <64 x i8>, <64 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %39 = load <32 x i16>, <32 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %40 = load <16 x i32>, <16 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %41 = load <8 x i64>, <8 x i64>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, <4 x i128>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, <2 x i256>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, <1 x i512>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %45 = load <16 x float>, <16 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %46 = load <8 x double>, <8 x double>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = load <8 x i8*>, <8 x i8*>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, <64 x i1>* undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load ptr, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load ptr, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <128 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x ptr>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = load <256 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = load <32 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = load <16 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = load <8 x i32>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = load <4 x i64>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = load <8 x float>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = load <4 x double>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = load <4 x ptr>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %37 = load <512 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %38 = load <64 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %39 = load <32 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %40 = load <16 x i32>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %41 = load <8 x i64>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %45 = load <16 x float>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %46 = load <8 x double>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = load <8 x ptr>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, ptr undef, align 64
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'loads_align64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, i16* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, i32* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, i64* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, i128* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, i256* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, i512* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, float* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, double* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load i8*, i8** undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load i1*, i1** undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = load <128 x i1>, <128 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, <16 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, <8 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, <4 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, <2 x i64>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, <1 x i128>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, <4 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, <2 x double>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x i8*>, <2 x i8*>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, <16 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, <8 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, <4 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, <2 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, <1 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = load <256 x i1>, <256 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = load <32 x i8>, <32 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = load <16 x i16>, <16 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = load <8 x i32>, <8 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = load <4 x i64>, <4 x i64>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, <2 x i128>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, <1 x i256>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = load <8 x float>, <8 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = load <4 x double>, <4 x double>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = load <4 x i8*>, <4 x i8*>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, <32 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %37 = load <512 x i1>, <512 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <64 x i8>, <64 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %39 = load <32 x i16>, <32 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <16 x i32>, <16 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = load <8 x i64>, <8 x i64>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, <4 x i128>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, <2 x i256>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, <1 x i512>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %45 = load <16 x float>, <16 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %46 = load <8 x double>, <8 x double>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %47 = load <8 x i8*>, <8 x i8*>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, <64 x i1>* undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load i16, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load i32, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load i64, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load i128, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = load i256, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = load i512, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load float, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load double, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load ptr, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load ptr, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = load <128 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <16 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <8 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <4 x i32>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <2 x i64>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <1 x i128>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <4 x float>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <2 x double>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <2 x ptr>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <16 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <8 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <4 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <2 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <1 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %26 = load <256 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = load <32 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = load <16 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = load <8 x i32>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = load <4 x i64>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = load <2 x i128>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %32 = load <1 x i256>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = load <8 x float>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = load <4 x double>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = load <4 x ptr>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <32 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %37 = load <512 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <64 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %39 = load <32 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <16 x i32>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = load <8 x i64>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %42 = load <4 x i128>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %43 = load <2 x i256>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %44 = load <1 x i512>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %45 = load <16 x float>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %46 = load <8 x double>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %47 = load <8 x ptr>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <64 x i1>, ptr undef, align 64
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- load i8, i8* undef, align 64
- load i16, i16* undef, align 64
- load i32, i32* undef, align 64
- load i64, i64* undef, align 64
- load i128, i128* undef, align 64
- load i256, i256* undef, align 64
- load i512, i512* undef, align 64
+ load i8, ptr undef, align 64
+ load i16, ptr undef, align 64
+ load i32, ptr undef, align 64
+ load i64, ptr undef, align 64
+ load i128, ptr undef, align 64
+ load i256, ptr undef, align 64
+ load i512, ptr undef, align 64
- load float, float* undef, align 64
- load double, double* undef, align 64
+ load float, ptr undef, align 64
+ load double, ptr undef, align 64
- load i8*, i8** undef, align 64
+ load ptr, ptr undef, align 64
- load i1*, i1** undef, align 64
+ load ptr, ptr undef, align 64
; XMM (128-bit) vectors
- load <128 x i1>, <128 x i1>* undef, align 64
- load <16 x i8>, <16 x i8>* undef, align 64
- load <8 x i16>, <8 x i16>* undef, align 64
- load <4 x i32>, <4 x i32>* undef, align 64
- load <2 x i64>, <2 x i64>* undef, align 64
- load <1 x i128>, <1 x i128>* undef, align 64
+ load <128 x i1>, ptr undef, align 64
+ load <16 x i8>, ptr undef, align 64
+ load <8 x i16>, ptr undef, align 64
+ load <4 x i32>, ptr undef, align 64
+ load <2 x i64>, ptr undef, align 64
+ load <1 x i128>, ptr undef, align 64
- load <4 x float>, <4 x float>* undef, align 64
- load <2 x double>, <2 x double>* undef, align 64
+ load <4 x float>, ptr undef, align 64
+ load <2 x double>, ptr undef, align 64
- load <2 x i8*>, <2 x i8*>* undef, align 64
+ load <2 x ptr>, ptr undef, align 64
- load <16 x i1>, <16 x i1>* undef, align 64
- load <8 x i1>, <8 x i1>* undef, align 64
- load <4 x i1>, <4 x i1>* undef, align 64
- load <2 x i1>, <2 x i1>* undef, align 64
- load <1 x i1>, <1 x i1>* undef, align 64
+ load <16 x i1>, ptr undef, align 64
+ load <8 x i1>, ptr undef, align 64
+ load <4 x i1>, ptr undef, align 64
+ load <2 x i1>, ptr undef, align 64
+ load <1 x i1>, ptr undef, align 64
; YMM (256-bit) vectors
- load <256 x i1>, <256 x i1>* undef, align 64
- load <32 x i8>, <32 x i8>* undef, align 64
- load <16 x i16>, <16 x i16>* undef, align 64
- load <8 x i32>, <8 x i32>* undef, align 64
- load <4 x i64>, <4 x i64>* undef, align 64
- load <2 x i128>, <2 x i128>* undef, align 64
- load <1 x i256>, <1 x i256>* undef, align 64
+ load <256 x i1>, ptr undef, align 64
+ load <32 x i8>, ptr undef, align 64
+ load <16 x i16>, ptr undef, align 64
+ load <8 x i32>, ptr undef, align 64
+ load <4 x i64>, ptr undef, align 64
+ load <2 x i128>, ptr undef, align 64
+ load <1 x i256>, ptr undef, align 64
- load <8 x float>, <8 x float>* undef, align 64
- load <4 x double>, <4 x double>* undef, align 64
+ load <8 x float>, ptr undef, align 64
+ load <4 x double>, ptr undef, align 64
- load <4 x i8*>, <4 x i8*>* undef, align 64
+ load <4 x ptr>, ptr undef, align 64
- load <32 x i1>, <32 x i1>* undef, align 64
+ load <32 x i1>, ptr undef, align 64
; ZMM (512-bit) vectors
- load <512 x i1>, <512 x i1>* undef, align 64
- load <64 x i8>, <64 x i8>* undef, align 64
- load <32 x i16>, <32 x i16>* undef, align 64
- load <16 x i32>, <16 x i32>* undef, align 64
- load <8 x i64>, <8 x i64>* undef, align 64
- load <4 x i128>, <4 x i128>* undef, align 64
- load <2 x i256>, <2 x i256>* undef, align 64
- load <1 x i512>, <1 x i512>* undef, align 64
+ load <512 x i1>, ptr undef, align 64
+ load <64 x i8>, ptr undef, align 64
+ load <32 x i16>, ptr undef, align 64
+ load <16 x i32>, ptr undef, align 64
+ load <8 x i64>, ptr undef, align 64
+ load <4 x i128>, ptr undef, align 64
+ load <2 x i256>, ptr undef, align 64
+ load <1 x i512>, ptr undef, align 64
- load <16 x float>, <16 x float>* undef, align 64
- load <8 x double>, <8 x double>* undef, align 64
+ load <16 x float>, ptr undef, align 64
+ load <8 x double>, ptr undef, align 64
- load <8 x i8*>, <8 x i8*>* undef, align 64
+ load <8 x ptr>, ptr undef, align 64
- load <64 x i1>, <64 x i1>* undef, align 64
+ load <64 x i1>, ptr undef, align 64
ret i32 undef
}
define i32 @loads_partial_align64(i32 %arg) {
; Partial vectors with i64 elements (doubles as pointer-sized tests))
; SSE2-LABEL: 'loads_partial_align64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, <3 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, <5 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, <6 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, <7 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %131 = load <9 x i1>, <9 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %132 = load <10 x i1>, <10 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %133 = load <11 x i1>, <11 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %134 = load <12 x i1>, <12 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %135 = load <13 x i1>, <13 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %136 = load <14 x i1>, <14 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %137 = load <15 x i1>, <15 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %138 = load <17 x i1>, <17 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %139 = load <18 x i1>, <18 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %140 = load <19 x i1>, <19 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %141 = load <20 x i1>, <20 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %142 = load <21 x i1>, <21 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %143 = load <22 x i1>, <22 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %144 = load <23 x i1>, <23 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %145 = load <24 x i1>, <24 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %146 = load <25 x i1>, <25 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %147 = load <26 x i1>, <26 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %148 = load <27 x i1>, <27 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %149 = load <28 x i1>, <28 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %150 = load <29 x i1>, <29 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %151 = load <30 x i1>, <30 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %152 = load <31 x i1>, <31 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %153 = load <33 x i1>, <33 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %154 = load <34 x i1>, <34 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %155 = load <35 x i1>, <35 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %156 = load <36 x i1>, <36 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %157 = load <37 x i1>, <37 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %158 = load <38 x i1>, <38 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %159 = load <39 x i1>, <39 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %160 = load <40 x i1>, <40 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %161 = load <41 x i1>, <41 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %162 = load <42 x i1>, <42 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %163 = load <43 x i1>, <43 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %164 = load <44 x i1>, <44 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %165 = load <45 x i1>, <45 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %166 = load <46 x i1>, <46 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %167 = load <47 x i1>, <47 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %168 = load <48 x i1>, <48 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %169 = load <49 x i1>, <49 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %170 = load <50 x i1>, <50 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %171 = load <51 x i1>, <51 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %172 = load <52 x i1>, <52 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %173 = load <53 x i1>, <53 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %174 = load <54 x i1>, <54 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %175 = load <55 x i1>, <55 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %176 = load <56 x i1>, <56 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %177 = load <57 x i1>, <57 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %178 = load <58 x i1>, <58 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %179 = load <59 x i1>, <59 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %180 = load <60 x i1>, <60 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %181 = load <61 x i1>, <61 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %182 = load <62 x i1>, <62 x i1>* undef, align 64
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %183 = load <63 x i1>, <63 x i1>* undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = load <3 x i64>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = load <5 x i64>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load <7 x i64>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = load <3 x double>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = load <5 x double>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = load <7 x double>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <3 x i32>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = load <5 x i32>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = load <7 x i32>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = load <9 x i32>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %19 = load <11 x i32>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = load <13 x i32>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <15 x i32>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = load <3 x float>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <5 x float>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = load <7 x float>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %32 = load <11 x float>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = load <13 x float>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %36 = load <15 x float>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %39 = load <3 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = load <5 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %42 = load <6 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %43 = load <7 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %44 = load <9 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %46 = load <11 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %48 = load <13 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %49 = load <14 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %50 = load <15 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %51 = load <17 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %52 = load <18 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %53 = load <19 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %55 = load <21 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %56 = load <22 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %57 = load <23 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %59 = load <25 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %60 = load <26 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %61 = load <27 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %63 = load <29 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %64 = load <30 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %65 = load <31 x i16>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %68 = load <3 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %70 = load <5 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %71 = load <6 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %72 = load <7 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %74 = load <9 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %75 = load <10 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %76 = load <11 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %77 = load <12 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %78 = load <13 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %79 = load <14 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %80 = load <15 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %81 = load <17 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %83 = load <19 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %85 = load <21 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %86 = load <22 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %87 = load <23 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %89 = load <25 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %90 = load <26 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %91 = load <27 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %92 = load <28 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %93 = load <29 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %94 = load <30 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %95 = load <31 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %96 = load <33 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %97 = load <34 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %98 = load <35 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %99 = load <36 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %100 = load <37 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %101 = load <38 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %102 = load <39 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %104 = load <41 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %105 = load <42 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %106 = load <43 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %107 = load <44 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %108 = load <45 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %109 = load <46 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %110 = load <47 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %112 = load <49 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %113 = load <50 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %114 = load <51 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %115 = load <52 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %116 = load <53 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %117 = load <54 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %118 = load <55 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %120 = load <57 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %121 = load <58 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %122 = load <59 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %123 = load <60 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %124 = load <61 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %125 = load <62 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %126 = load <63 x i8>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %131 = load <9 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %132 = load <10 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %133 = load <11 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %134 = load <12 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %135 = load <13 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %136 = load <14 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %137 = load <15 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %138 = load <17 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %139 = load <18 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %140 = load <19 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %141 = load <20 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %142 = load <21 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %143 = load <22 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %144 = load <23 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %145 = load <24 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %146 = load <25 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %147 = load <26 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %148 = load <27 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %149 = load <28 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %150 = load <29 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %151 = load <30 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %152 = load <31 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %153 = load <33 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %154 = load <34 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %155 = load <35 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %156 = load <36 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %157 = load <37 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %158 = load <38 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %159 = load <39 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %160 = load <40 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %161 = load <41 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %162 = load <42 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %163 = load <43 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %164 = load <44 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %165 = load <45 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %166 = load <46 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %167 = load <47 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %168 = load <48 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %169 = load <49 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %170 = load <50 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %171 = load <51 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %172 = load <52 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %173 = load <53 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %174 = load <54 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %175 = load <55 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %176 = load <56 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %177 = load <57 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %178 = load <58 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %179 = load <59 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %180 = load <60 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %181 = load <61 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %182 = load <62 x i1>, ptr undef, align 64
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %183 = load <63 x i1>, ptr undef, align 64
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE41-LABEL: 'loads_partial_align64'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, <3 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, <5 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, <6 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, <7 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %131 = load <9 x i1>, <9 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %132 = load <10 x i1>, <10 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %133 = load <11 x i1>, <11 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %134 = load <12 x i1>, <12 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %135 = load <13 x i1>, <13 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %136 = load <14 x i1>, <14 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %137 = load <15 x i1>, <15 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %138 = load <17 x i1>, <17 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %139 = load <18 x i1>, <18 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %140 = load <19 x i1>, <19 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %141 = load <20 x i1>, <20 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %142 = load <21 x i1>, <21 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %143 = load <22 x i1>, <22 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %144 = load <23 x i1>, <23 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %145 = load <24 x i1>, <24 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %146 = load <25 x i1>, <25 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %147 = load <26 x i1>, <26 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %148 = load <27 x i1>, <27 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %149 = load <28 x i1>, <28 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %150 = load <29 x i1>, <29 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %151 = load <30 x i1>, <30 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %152 = load <31 x i1>, <31 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %153 = load <33 x i1>, <33 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %154 = load <34 x i1>, <34 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %155 = load <35 x i1>, <35 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %156 = load <36 x i1>, <36 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %157 = load <37 x i1>, <37 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %158 = load <38 x i1>, <38 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %159 = load <39 x i1>, <39 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %160 = load <40 x i1>, <40 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %161 = load <41 x i1>, <41 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %162 = load <42 x i1>, <42 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %163 = load <43 x i1>, <43 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %164 = load <44 x i1>, <44 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %165 = load <45 x i1>, <45 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %166 = load <46 x i1>, <46 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %167 = load <47 x i1>, <47 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %168 = load <48 x i1>, <48 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %169 = load <49 x i1>, <49 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %170 = load <50 x i1>, <50 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %171 = load <51 x i1>, <51 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %172 = load <52 x i1>, <52 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %173 = load <53 x i1>, <53 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %174 = load <54 x i1>, <54 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %175 = load <55 x i1>, <55 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %176 = load <56 x i1>, <56 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %177 = load <57 x i1>, <57 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %178 = load <58 x i1>, <58 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %179 = load <59 x i1>, <59 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %180 = load <60 x i1>, <60 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %181 = load <61 x i1>, <61 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %182 = load <62 x i1>, <62 x i1>* undef, align 64
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %183 = load <63 x i1>, <63 x i1>* undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = load <3 x i64>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = load <5 x i64>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = load <6 x i64>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load <7 x i64>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = load <3 x double>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %8 = load <5 x double>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %9 = load <6 x double>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %10 = load <7 x double>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <3 x i32>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = load <5 x i32>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = load <6 x i32>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = load <7 x i32>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %17 = load <9 x i32>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %18 = load <10 x i32>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %19 = load <11 x i32>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = load <12 x i32>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = load <13 x i32>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %22 = load <14 x i32>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <15 x i32>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = load <3 x float>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %27 = load <5 x float>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = load <6 x float>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %29 = load <7 x float>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %30 = load <9 x float>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %31 = load <10 x float>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %32 = load <11 x float>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %33 = load <12 x float>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = load <13 x float>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %35 = load <14 x float>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %36 = load <15 x float>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %39 = load <3 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = load <5 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %42 = load <6 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %43 = load <7 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %44 = load <9 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %45 = load <10 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %46 = load <11 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %47 = load <12 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %48 = load <13 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %49 = load <14 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %50 = load <15 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %51 = load <17 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %52 = load <18 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %53 = load <19 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %54 = load <20 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %55 = load <21 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %56 = load <22 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %57 = load <23 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %58 = load <24 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %59 = load <25 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %60 = load <26 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %61 = load <27 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %62 = load <28 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %63 = load <29 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %64 = load <30 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %65 = load <31 x i16>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %68 = load <3 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %70 = load <5 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %71 = load <6 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %72 = load <7 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %74 = load <9 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %75 = load <10 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %76 = load <11 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %77 = load <12 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %78 = load <13 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %79 = load <14 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %80 = load <15 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %81 = load <17 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %82 = load <18 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %83 = load <19 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %84 = load <20 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %85 = load <21 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %86 = load <22 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %87 = load <23 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %88 = load <24 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %89 = load <25 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %90 = load <26 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %91 = load <27 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %92 = load <28 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %93 = load <29 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %94 = load <30 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %95 = load <31 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %96 = load <33 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %97 = load <34 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %98 = load <35 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %99 = load <36 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %100 = load <37 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %101 = load <38 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %102 = load <39 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %103 = load <40 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %104 = load <41 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %105 = load <42 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %106 = load <43 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %107 = load <44 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %108 = load <45 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %109 = load <46 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %110 = load <47 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %111 = load <48 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %112 = load <49 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %113 = load <50 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %114 = load <51 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %115 = load <52 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %116 = load <53 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %117 = load <54 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %118 = load <55 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %119 = load <56 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %120 = load <57 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %121 = load <58 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %122 = load <59 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %123 = load <60 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %124 = load <61 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %125 = load <62 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %126 = load <63 x i8>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %131 = load <9 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %132 = load <10 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %133 = load <11 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %134 = load <12 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %135 = load <13 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %136 = load <14 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %137 = load <15 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %138 = load <17 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %139 = load <18 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %140 = load <19 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %141 = load <20 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %142 = load <21 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %143 = load <22 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %144 = load <23 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %145 = load <24 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %146 = load <25 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %147 = load <26 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %148 = load <27 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %149 = load <28 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %150 = load <29 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %151 = load <30 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %152 = load <31 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %153 = load <33 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %154 = load <34 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %155 = load <35 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %156 = load <36 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %157 = load <37 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %158 = load <38 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %159 = load <39 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %160 = load <40 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %161 = load <41 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %162 = load <42 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %163 = load <43 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %164 = load <44 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %165 = load <45 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %166 = load <46 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %167 = load <47 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %168 = load <48 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %169 = load <49 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %170 = load <50 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %171 = load <51 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %172 = load <52 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %173 = load <53 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %174 = load <54 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %175 = load <55 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %176 = load <56 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %177 = load <57 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %178 = load <58 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %179 = load <59 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %180 = load <60 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %181 = load <61 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %182 = load <62 x i1>, ptr undef, align 64
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %183 = load <63 x i1>, ptr undef, align 64
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'loads_partial_align64'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, <3 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, <5 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, <6 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, <7 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %131 = load <9 x i1>, <9 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %132 = load <10 x i1>, <10 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %133 = load <11 x i1>, <11 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %134 = load <12 x i1>, <12 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %135 = load <13 x i1>, <13 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %136 = load <14 x i1>, <14 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %137 = load <15 x i1>, <15 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %138 = load <17 x i1>, <17 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %139 = load <18 x i1>, <18 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %140 = load <19 x i1>, <19 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %141 = load <20 x i1>, <20 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %142 = load <21 x i1>, <21 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %143 = load <22 x i1>, <22 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %144 = load <23 x i1>, <23 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %145 = load <24 x i1>, <24 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %146 = load <25 x i1>, <25 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %147 = load <26 x i1>, <26 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %148 = load <27 x i1>, <27 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %149 = load <28 x i1>, <28 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %150 = load <29 x i1>, <29 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %151 = load <30 x i1>, <30 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %152 = load <31 x i1>, <31 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %153 = load <33 x i1>, <33 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %154 = load <34 x i1>, <34 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %155 = load <35 x i1>, <35 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %156 = load <36 x i1>, <36 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %157 = load <37 x i1>, <37 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %158 = load <38 x i1>, <38 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %159 = load <39 x i1>, <39 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %160 = load <40 x i1>, <40 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %161 = load <41 x i1>, <41 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %162 = load <42 x i1>, <42 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %163 = load <43 x i1>, <43 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %164 = load <44 x i1>, <44 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %165 = load <45 x i1>, <45 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %166 = load <46 x i1>, <46 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %167 = load <47 x i1>, <47 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %168 = load <48 x i1>, <48 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %169 = load <49 x i1>, <49 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %170 = load <50 x i1>, <50 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %171 = load <51 x i1>, <51 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %172 = load <52 x i1>, <52 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %173 = load <53 x i1>, <53 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %174 = load <54 x i1>, <54 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %175 = load <55 x i1>, <55 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %176 = load <56 x i1>, <56 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %177 = load <57 x i1>, <57 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %178 = load <58 x i1>, <58 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %179 = load <59 x i1>, <59 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %180 = load <60 x i1>, <60 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %181 = load <61 x i1>, <61 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %182 = load <62 x i1>, <62 x i1>* undef, align 64
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %183 = load <63 x i1>, <63 x i1>* undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load <3 x i64>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = load <5 x i64>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = load <6 x i64>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = load <7 x i64>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load <3 x double>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = load <5 x double>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = load <6 x double>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = load <7 x double>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <3 x i32>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <5 x i32>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <6 x i32>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <7 x i32>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = load <9 x i32>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = load <10 x i32>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <11 x i32>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = load <12 x i32>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = load <13 x i32>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <14 x i32>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = load <15 x i32>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = load <3 x float>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = load <5 x float>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = load <6 x float>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = load <7 x float>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %30 = load <9 x float>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = load <10 x float>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %32 = load <11 x float>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = load <12 x float>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %34 = load <13 x float>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %35 = load <14 x float>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %36 = load <15 x float>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %39 = load <3 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = load <5 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %42 = load <6 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %43 = load <7 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %44 = load <9 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %45 = load <10 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %46 = load <11 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %47 = load <12 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <13 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %49 = load <14 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %50 = load <15 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %51 = load <17 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %52 = load <18 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %53 = load <19 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %54 = load <20 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %55 = load <21 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %56 = load <22 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %57 = load <23 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %58 = load <24 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %59 = load <25 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %60 = load <26 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %61 = load <27 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %62 = load <28 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %63 = load <29 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %64 = load <30 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %65 = load <31 x i16>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %68 = load <3 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %70 = load <5 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %71 = load <6 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %72 = load <7 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %74 = load <9 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %75 = load <10 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %76 = load <11 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %77 = load <12 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %78 = load <13 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %79 = load <14 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %80 = load <15 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %81 = load <17 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %82 = load <18 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %83 = load <19 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %84 = load <20 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %85 = load <21 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %86 = load <22 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %87 = load <23 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %88 = load <24 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %89 = load <25 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %90 = load <26 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %91 = load <27 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %92 = load <28 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %93 = load <29 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %94 = load <30 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %95 = load <31 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %96 = load <33 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %97 = load <34 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %98 = load <35 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %99 = load <36 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %100 = load <37 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %101 = load <38 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %102 = load <39 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %103 = load <40 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %104 = load <41 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %105 = load <42 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %106 = load <43 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %107 = load <44 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %108 = load <45 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %109 = load <46 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %110 = load <47 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %111 = load <48 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %112 = load <49 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %113 = load <50 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %114 = load <51 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %115 = load <52 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %116 = load <53 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %117 = load <54 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %118 = load <55 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %119 = load <56 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %120 = load <57 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %121 = load <58 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %122 = load <59 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %123 = load <60 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %124 = load <61 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %125 = load <62 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %126 = load <63 x i8>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %131 = load <9 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %132 = load <10 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %133 = load <11 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %134 = load <12 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %135 = load <13 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %136 = load <14 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %137 = load <15 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %138 = load <17 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %139 = load <18 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %140 = load <19 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %141 = load <20 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %142 = load <21 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %143 = load <22 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %144 = load <23 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %145 = load <24 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %146 = load <25 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %147 = load <26 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %148 = load <27 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %149 = load <28 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %150 = load <29 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %151 = load <30 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %152 = load <31 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %153 = load <33 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %154 = load <34 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %155 = load <35 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %156 = load <36 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %157 = load <37 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %158 = load <38 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %159 = load <39 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %160 = load <40 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %161 = load <41 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %162 = load <42 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %163 = load <43 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %164 = load <44 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %165 = load <45 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %166 = load <46 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %167 = load <47 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %168 = load <48 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %169 = load <49 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %170 = load <50 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %171 = load <51 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %172 = load <52 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %173 = load <53 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %174 = load <54 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %175 = load <55 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %176 = load <56 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %177 = load <57 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %178 = load <58 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %179 = load <59 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %180 = load <60 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %181 = load <61 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %182 = load <62 x i1>, ptr undef, align 64
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %183 = load <63 x i1>, ptr undef, align 64
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'loads_partial_align64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, <1 x i64>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load <3 x i64>, <3 x i64>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load <5 x i64>, <5 x i64>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load <6 x i64>, <6 x i64>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = load <7 x i64>, <7 x i64>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, <1 x double>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load <3 x double>, <3 x double>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <5 x double>, <5 x double>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <6 x double>, <6 x double>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <7 x double>, <7 x double>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, <1 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, <2 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <3 x i32>, <3 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <5 x i32>, <5 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <6 x i32>, <6 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <7 x i32>, <7 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = load <9 x i32>, <9 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <10 x i32>, <10 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <11 x i32>, <11 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <12 x i32>, <12 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <13 x i32>, <13 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <14 x i32>, <14 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <15 x i32>, <15 x i32>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, <1 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, <2 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = load <3 x float>, <3 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = load <5 x float>, <5 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = load <6 x float>, <6 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = load <7 x float>, <7 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = load <9 x float>, <9 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %31 = load <10 x float>, <10 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = load <11 x float>, <11 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = load <12 x float>, <12 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = load <13 x float>, <13 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = load <14 x float>, <14 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <15 x float>, <15 x float>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, <1 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, <2 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %39 = load <3 x i16>, <3 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, <4 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = load <5 x i16>, <5 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %42 = load <6 x i16>, <6 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %43 = load <7 x i16>, <7 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %44 = load <9 x i16>, <9 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %45 = load <10 x i16>, <10 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %46 = load <11 x i16>, <11 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %47 = load <12 x i16>, <12 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <13 x i16>, <13 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %49 = load <14 x i16>, <14 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %50 = load <15 x i16>, <15 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %51 = load <17 x i16>, <17 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %52 = load <18 x i16>, <18 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %53 = load <19 x i16>, <19 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %54 = load <20 x i16>, <20 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %55 = load <21 x i16>, <21 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %56 = load <22 x i16>, <22 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %57 = load <23 x i16>, <23 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %58 = load <24 x i16>, <24 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %59 = load <25 x i16>, <25 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %60 = load <26 x i16>, <26 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %61 = load <27 x i16>, <27 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %62 = load <28 x i16>, <28 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %63 = load <29 x i16>, <29 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %64 = load <30 x i16>, <30 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %65 = load <31 x i16>, <31 x i16>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, <1 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, <2 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %68 = load <3 x i8>, <3 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, <4 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %70 = load <5 x i8>, <5 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %71 = load <6 x i8>, <6 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %72 = load <7 x i8>, <7 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, <8 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %74 = load <9 x i8>, <9 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %75 = load <10 x i8>, <10 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %76 = load <11 x i8>, <11 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %77 = load <12 x i8>, <12 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %78 = load <13 x i8>, <13 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %79 = load <14 x i8>, <14 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %80 = load <15 x i8>, <15 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %81 = load <17 x i8>, <17 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %82 = load <18 x i8>, <18 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %83 = load <19 x i8>, <19 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %84 = load <20 x i8>, <20 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %85 = load <21 x i8>, <21 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %86 = load <22 x i8>, <22 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %87 = load <23 x i8>, <23 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %88 = load <24 x i8>, <24 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %89 = load <25 x i8>, <25 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %90 = load <26 x i8>, <26 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %91 = load <27 x i8>, <27 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %92 = load <28 x i8>, <28 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %93 = load <29 x i8>, <29 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %94 = load <30 x i8>, <30 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %95 = load <31 x i8>, <31 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %96 = load <33 x i8>, <33 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %97 = load <34 x i8>, <34 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %98 = load <35 x i8>, <35 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %99 = load <36 x i8>, <36 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %100 = load <37 x i8>, <37 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %101 = load <38 x i8>, <38 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %102 = load <39 x i8>, <39 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %103 = load <40 x i8>, <40 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %104 = load <41 x i8>, <41 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %105 = load <42 x i8>, <42 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %106 = load <43 x i8>, <43 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %107 = load <44 x i8>, <44 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %108 = load <45 x i8>, <45 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %109 = load <46 x i8>, <46 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %110 = load <47 x i8>, <47 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %111 = load <48 x i8>, <48 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %112 = load <49 x i8>, <49 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %113 = load <50 x i8>, <50 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %114 = load <51 x i8>, <51 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %115 = load <52 x i8>, <52 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %116 = load <53 x i8>, <53 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %117 = load <54 x i8>, <54 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %118 = load <55 x i8>, <55 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %119 = load <56 x i8>, <56 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %120 = load <57 x i8>, <57 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %121 = load <58 x i8>, <58 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %122 = load <59 x i8>, <59 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %123 = load <60 x i8>, <60 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %124 = load <61 x i8>, <61 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %125 = load <62 x i8>, <62 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %126 = load <63 x i8>, <63 x i8>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, <3 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, <5 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, <6 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, <7 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %131 = load <9 x i1>, <9 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %132 = load <10 x i1>, <10 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %133 = load <11 x i1>, <11 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %134 = load <12 x i1>, <12 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %135 = load <13 x i1>, <13 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %136 = load <14 x i1>, <14 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %137 = load <15 x i1>, <15 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %138 = load <17 x i1>, <17 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %139 = load <18 x i1>, <18 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %140 = load <19 x i1>, <19 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %141 = load <20 x i1>, <20 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %142 = load <21 x i1>, <21 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %143 = load <22 x i1>, <22 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %144 = load <23 x i1>, <23 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %145 = load <24 x i1>, <24 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %146 = load <25 x i1>, <25 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %147 = load <26 x i1>, <26 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %148 = load <27 x i1>, <27 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %149 = load <28 x i1>, <28 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %150 = load <29 x i1>, <29 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %151 = load <30 x i1>, <30 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %152 = load <31 x i1>, <31 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %153 = load <33 x i1>, <33 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %154 = load <34 x i1>, <34 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %155 = load <35 x i1>, <35 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %156 = load <36 x i1>, <36 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %157 = load <37 x i1>, <37 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %158 = load <38 x i1>, <38 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %159 = load <39 x i1>, <39 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %160 = load <40 x i1>, <40 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %161 = load <41 x i1>, <41 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %162 = load <42 x i1>, <42 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %163 = load <43 x i1>, <43 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %164 = load <44 x i1>, <44 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %165 = load <45 x i1>, <45 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %166 = load <46 x i1>, <46 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %167 = load <47 x i1>, <47 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %168 = load <48 x i1>, <48 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %169 = load <49 x i1>, <49 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %170 = load <50 x i1>, <50 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %171 = load <51 x i1>, <51 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %172 = load <52 x i1>, <52 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %173 = load <53 x i1>, <53 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %174 = load <54 x i1>, <54 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %175 = load <55 x i1>, <55 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %176 = load <56 x i1>, <56 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %177 = load <57 x i1>, <57 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %178 = load <58 x i1>, <58 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %179 = load <59 x i1>, <59 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %180 = load <60 x i1>, <60 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %181 = load <61 x i1>, <61 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %182 = load <62 x i1>, <62 x i1>* undef, align 64
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %183 = load <63 x i1>, <63 x i1>* undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <1 x i64>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = load <3 x i64>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = load <5 x i64>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = load <6 x i64>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = load <7 x i64>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load <1 x double>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load <3 x double>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = load <5 x double>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = load <6 x double>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = load <7 x double>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = load <1 x i32>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = load <2 x i32>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = load <3 x i32>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = load <5 x i32>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = load <6 x i32>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = load <7 x i32>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = load <9 x i32>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = load <10 x i32>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = load <11 x i32>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = load <12 x i32>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <13 x i32>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <14 x i32>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = load <15 x i32>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = load <1 x float>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = load <2 x float>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = load <3 x float>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = load <5 x float>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = load <6 x float>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = load <7 x float>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = load <9 x float>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %31 = load <10 x float>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = load <11 x float>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = load <12 x float>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = load <13 x float>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = load <14 x float>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = load <15 x float>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = load <1 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = load <2 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %39 = load <3 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %40 = load <4 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %41 = load <5 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %42 = load <6 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %43 = load <7 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %44 = load <9 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %45 = load <10 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %46 = load <11 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %47 = load <12 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = load <13 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %49 = load <14 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %50 = load <15 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %51 = load <17 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %52 = load <18 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %53 = load <19 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %54 = load <20 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %55 = load <21 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %56 = load <22 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %57 = load <23 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %58 = load <24 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %59 = load <25 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %60 = load <26 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %61 = load <27 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %62 = load <28 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %63 = load <29 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %64 = load <30 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %65 = load <31 x i16>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %66 = load <1 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %67 = load <2 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %68 = load <3 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %69 = load <4 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %70 = load <5 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %71 = load <6 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %72 = load <7 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = load <8 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %74 = load <9 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %75 = load <10 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %76 = load <11 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %77 = load <12 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %78 = load <13 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %79 = load <14 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %80 = load <15 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %81 = load <17 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %82 = load <18 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %83 = load <19 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %84 = load <20 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %85 = load <21 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %86 = load <22 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %87 = load <23 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %88 = load <24 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %89 = load <25 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %90 = load <26 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %91 = load <27 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %92 = load <28 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %93 = load <29 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %94 = load <30 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %95 = load <31 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %96 = load <33 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %97 = load <34 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %98 = load <35 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %99 = load <36 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %100 = load <37 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %101 = load <38 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %102 = load <39 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %103 = load <40 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %104 = load <41 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %105 = load <42 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %106 = load <43 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %107 = load <44 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %108 = load <45 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %109 = load <46 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %110 = load <47 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %111 = load <48 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %112 = load <49 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %113 = load <50 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %114 = load <51 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %115 = load <52 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %116 = load <53 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %117 = load <54 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %118 = load <55 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %119 = load <56 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %120 = load <57 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %121 = load <58 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %122 = load <59 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %123 = load <60 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %124 = load <61 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %125 = load <62 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %126 = load <63 x i8>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %127 = load <3 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %128 = load <5 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %129 = load <6 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %130 = load <7 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %131 = load <9 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %132 = load <10 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %133 = load <11 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %134 = load <12 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %135 = load <13 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %136 = load <14 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %137 = load <15 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %138 = load <17 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %139 = load <18 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %140 = load <19 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %141 = load <20 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %142 = load <21 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %143 = load <22 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %144 = load <23 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %145 = load <24 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %146 = load <25 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %147 = load <26 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %148 = load <27 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %149 = load <28 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %150 = load <29 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %151 = load <30 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %152 = load <31 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %153 = load <33 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %154 = load <34 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %155 = load <35 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %156 = load <36 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %157 = load <37 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %158 = load <38 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %159 = load <39 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %160 = load <40 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %161 = load <41 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %162 = load <42 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %163 = load <43 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %164 = load <44 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %165 = load <45 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %166 = load <46 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %167 = load <47 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %168 = load <48 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %169 = load <49 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %170 = load <50 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %171 = load <51 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %172 = load <52 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %173 = load <53 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %174 = load <54 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %175 = load <55 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %176 = load <56 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %177 = load <57 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %178 = load <58 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %179 = load <59 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %180 = load <60 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %181 = load <61 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %182 = load <62 x i1>, ptr undef, align 64
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %183 = load <63 x i1>, ptr undef, align 64
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
- load <1 x i64>, <1 x i64>* undef, align 64
+ load <1 x i64>, ptr undef, align 64
; <2 x i64> is XMM
- load <3 x i64>, <3 x i64>* undef, align 64
+ load <3 x i64>, ptr undef, align 64
; <4 x i64> is YMM
- load <5 x i64>, <5 x i64>* undef, align 64
- load <6 x i64>, <6 x i64>* undef, align 64
- load <7 x i64>, <7 x i64>* undef, align 64
+ load <5 x i64>, ptr undef, align 64
+ load <6 x i64>, ptr undef, align 64
+ load <7 x i64>, ptr undef, align 64
; <8 x i64> is ZMM
; Partial vectors with double elements
- load <1 x double>, <1 x double>* undef, align 64
+ load <1 x double>, ptr undef, align 64
; <2 x double> is XMM
- load <3 x double>, <3 x double>* undef, align 64
+ load <3 x double>, ptr undef, align 64
; <4 x double> is YMM
- load <5 x double>, <5 x double>* undef, align 64
- load <6 x double>, <6 x double>* undef, align 64
- load <7 x double>, <7 x double>* undef, align 64
+ load <5 x double>, ptr undef, align 64
+ load <6 x double>, ptr undef, align 64
+ load <7 x double>, ptr undef, align 64
; <8 x double> is ZMM
; Partial vectors with i32 elements
- load <1 x i32>, <1 x i32>* undef, align 64
- load <2 x i32>, <2 x i32>* undef, align 64
- load <3 x i32>, <3 x i32>* undef, align 64
+ load <1 x i32>, ptr undef, align 64
+ load <2 x i32>, ptr undef, align 64
+ load <3 x i32>, ptr undef, align 64
; <4 x i32> is XMM
- load <5 x i32>, <5 x i32>* undef, align 64
- load <6 x i32>, <6 x i32>* undef, align 64
- load <7 x i32>, <7 x i32>* undef, align 64
+ load <5 x i32>, ptr undef, align 64
+ load <6 x i32>, ptr undef, align 64
+ load <7 x i32>, ptr undef, align 64
; <8 x i32> is YMM
- load <9 x i32>, <9 x i32>* undef, align 64
- load <10 x i32>, <10 x i32>* undef, align 64
- load <11 x i32>, <11 x i32>* undef, align 64
- load <12 x i32>, <12 x i32>* undef, align 64
- load <13 x i32>, <13 x i32>* undef, align 64
- load <14 x i32>, <14 x i32>* undef, align 64
- load <15 x i32>, <15 x i32>* undef, align 64
+ load <9 x i32>, ptr undef, align 64
+ load <10 x i32>, ptr undef, align 64
+ load <11 x i32>, ptr undef, align 64
+ load <12 x i32>, ptr undef, align 64
+ load <13 x i32>, ptr undef, align 64
+ load <14 x i32>, ptr undef, align 64
+ load <15 x i32>, ptr undef, align 64
; <16 x i32> is ZMM
; Partial vectors with float elements
- load <1 x float>, <1 x float>* undef, align 64
- load <2 x float>, <2 x float>* undef, align 64
- load <3 x float>, <3 x float>* undef, align 64
+ load <1 x float>, ptr undef, align 64
+ load <2 x float>, ptr undef, align 64
+ load <3 x float>, ptr undef, align 64
; <4 x float> is XMM
- load <5 x float>, <5 x float>* undef, align 64
- load <6 x float>, <6 x float>* undef, align 64
- load <7 x float>, <7 x float>* undef, align 64
+ load <5 x float>, ptr undef, align 64
+ load <6 x float>, ptr undef, align 64
+ load <7 x float>, ptr undef, align 64
; <8 x float> is YMM
- load <9 x float>, <9 x float>* undef, align 64
- load <10 x float>, <10 x float>* undef, align 64
- load <11 x float>, <11 x float>* undef, align 64
- load <12 x float>, <12 x float>* undef, align 64
- load <13 x float>, <13 x float>* undef, align 64
- load <14 x float>, <14 x float>* undef, align 64
- load <15 x float>, <15 x float>* undef, align 64
+ load <9 x float>, ptr undef, align 64
+ load <10 x float>, ptr undef, align 64
+ load <11 x float>, ptr undef, align 64
+ load <12 x float>, ptr undef, align 64
+ load <13 x float>, ptr undef, align 64
+ load <14 x float>, ptr undef, align 64
+ load <15 x float>, ptr undef, align 64
; <16 x float> is ZMM
; Partial vectors with i16 elements
- load <1 x i16>, <1 x i16>* undef, align 64
- load <2 x i16>, <2 x i16>* undef, align 64
- load <3 x i16>, <3 x i16>* undef, align 64
- load <4 x i16>, <4 x i16>* undef, align 64
- load <5 x i16>, <5 x i16>* undef, align 64
- load <6 x i16>, <6 x i16>* undef, align 64
- load <7 x i16>, <7 x i16>* undef, align 64
+ load <1 x i16>, ptr undef, align 64
+ load <2 x i16>, ptr undef, align 64
+ load <3 x i16>, ptr undef, align 64
+ load <4 x i16>, ptr undef, align 64
+ load <5 x i16>, ptr undef, align 64
+ load <6 x i16>, ptr undef, align 64
+ load <7 x i16>, ptr undef, align 64
; <8 x i16> is XMM
- load <9 x i16>, <9 x i16>* undef, align 64
- load <10 x i16>, <10 x i16>* undef, align 64
- load <11 x i16>, <11 x i16>* undef, align 64
- load <12 x i16>, <12 x i16>* undef, align 64
- load <13 x i16>, <13 x i16>* undef, align 64
- load <14 x i16>, <14 x i16>* undef, align 64
- load <15 x i16>, <15 x i16>* undef, align 64
+ load <9 x i16>, ptr undef, align 64
+ load <10 x i16>, ptr undef, align 64
+ load <11 x i16>, ptr undef, align 64
+ load <12 x i16>, ptr undef, align 64
+ load <13 x i16>, ptr undef, align 64
+ load <14 x i16>, ptr undef, align 64
+ load <15 x i16>, ptr undef, align 64
; <16 x i16> is YMM
- load <17 x i16>, <17 x i16>* undef, align 64
- load <18 x i16>, <18 x i16>* undef, align 64
- load <19 x i16>, <19 x i16>* undef, align 64
- load <20 x i16>, <20 x i16>* undef, align 64
- load <21 x i16>, <21 x i16>* undef, align 64
- load <22 x i16>, <22 x i16>* undef, align 64
- load <23 x i16>, <23 x i16>* undef, align 64
- load <24 x i16>, <24 x i16>* undef, align 64
- load <25 x i16>, <25 x i16>* undef, align 64
- load <26 x i16>, <26 x i16>* undef, align 64
- load <27 x i16>, <27 x i16>* undef, align 64
- load <28 x i16>, <28 x i16>* undef, align 64
- load <29 x i16>, <29 x i16>* undef, align 64
- load <30 x i16>, <30 x i16>* undef, align 64
- load <31 x i16>, <31 x i16>* undef, align 64
+ load <17 x i16>, ptr undef, align 64
+ load <18 x i16>, ptr undef, align 64
+ load <19 x i16>, ptr undef, align 64
+ load <20 x i16>, ptr undef, align 64
+ load <21 x i16>, ptr undef, align 64
+ load <22 x i16>, ptr undef, align 64
+ load <23 x i16>, ptr undef, align 64
+ load <24 x i16>, ptr undef, align 64
+ load <25 x i16>, ptr undef, align 64
+ load <26 x i16>, ptr undef, align 64
+ load <27 x i16>, ptr undef, align 64
+ load <28 x i16>, ptr undef, align 64
+ load <29 x i16>, ptr undef, align 64
+ load <30 x i16>, ptr undef, align 64
+ load <31 x i16>, ptr undef, align 64
; <32 x i16> is ZMM
; Partial vectors with i8 elements
- load <1 x i8>, <1 x i8>* undef, align 64
- load <2 x i8>, <2 x i8>* undef, align 64
- load <3 x i8>, <3 x i8>* undef, align 64
- load <4 x i8>, <4 x i8>* undef, align 64
- load <5 x i8>, <5 x i8>* undef, align 64
- load <6 x i8>, <6 x i8>* undef, align 64
- load <7 x i8>, <7 x i8>* undef, align 64
- load <8 x i8>, <8 x i8>* undef, align 64
- load <9 x i8>, <9 x i8>* undef, align 64
- load <10 x i8>, <10 x i8>* undef, align 64
- load <11 x i8>, <11 x i8>* undef, align 64
- load <12 x i8>, <12 x i8>* undef, align 64
- load <13 x i8>, <13 x i8>* undef, align 64
- load <14 x i8>, <14 x i8>* undef, align 64
- load <15 x i8>, <15 x i8>* undef, align 64
+ load <1 x i8>, ptr undef, align 64
+ load <2 x i8>, ptr undef, align 64
+ load <3 x i8>, ptr undef, align 64
+ load <4 x i8>, ptr undef, align 64
+ load <5 x i8>, ptr undef, align 64
+ load <6 x i8>, ptr undef, align 64
+ load <7 x i8>, ptr undef, align 64
+ load <8 x i8>, ptr undef, align 64
+ load <9 x i8>, ptr undef, align 64
+ load <10 x i8>, ptr undef, align 64
+ load <11 x i8>, ptr undef, align 64
+ load <12 x i8>, ptr undef, align 64
+ load <13 x i8>, ptr undef, align 64
+ load <14 x i8>, ptr undef, align 64
+ load <15 x i8>, ptr undef, align 64
; <16 x i8> is XMM
- load <17 x i8>, <17 x i8>* undef, align 64
- load <18 x i8>, <18 x i8>* undef, align 64
- load <19 x i8>, <19 x i8>* undef, align 64
- load <20 x i8>, <20 x i8>* undef, align 64
- load <21 x i8>, <21 x i8>* undef, align 64
- load <22 x i8>, <22 x i8>* undef, align 64
- load <23 x i8>, <23 x i8>* undef, align 64
- load <24 x i8>, <24 x i8>* undef, align 64
- load <25 x i8>, <25 x i8>* undef, align 64
- load <26 x i8>, <26 x i8>* undef, align 64
- load <27 x i8>, <27 x i8>* undef, align 64
- load <28 x i8>, <28 x i8>* undef, align 64
- load <29 x i8>, <29 x i8>* undef, align 64
- load <30 x i8>, <30 x i8>* undef, align 64
- load <31 x i8>, <31 x i8>* undef, align 64
+ load <17 x i8>, ptr undef, align 64
+ load <18 x i8>, ptr undef, align 64
+ load <19 x i8>, ptr undef, align 64
+ load <20 x i8>, ptr undef, align 64
+ load <21 x i8>, ptr undef, align 64
+ load <22 x i8>, ptr undef, align 64
+ load <23 x i8>, ptr undef, align 64
+ load <24 x i8>, ptr undef, align 64
+ load <25 x i8>, ptr undef, align 64
+ load <26 x i8>, ptr undef, align 64
+ load <27 x i8>, ptr undef, align 64
+ load <28 x i8>, ptr undef, align 64
+ load <29 x i8>, ptr undef, align 64
+ load <30 x i8>, ptr undef, align 64
+ load <31 x i8>, ptr undef, align 64
; <32 x i8> is YMM
- load <33 x i8>, <33 x i8>* undef, align 64
- load <34 x i8>, <34 x i8>* undef, align 64
- load <35 x i8>, <35 x i8>* undef, align 64
- load <36 x i8>, <36 x i8>* undef, align 64
- load <37 x i8>, <37 x i8>* undef, align 64
- load <38 x i8>, <38 x i8>* undef, align 64
- load <39 x i8>, <39 x i8>* undef, align 64
- load <40 x i8>, <40 x i8>* undef, align 64
- load <41 x i8>, <41 x i8>* undef, align 64
- load <42 x i8>, <42 x i8>* undef, align 64
- load <43 x i8>, <43 x i8>* undef, align 64
- load <44 x i8>, <44 x i8>* undef, align 64
- load <45 x i8>, <45 x i8>* undef, align 64
- load <46 x i8>, <46 x i8>* undef, align 64
- load <47 x i8>, <47 x i8>* undef, align 64
- load <48 x i8>, <48 x i8>* undef, align 64
- load <49 x i8>, <49 x i8>* undef, align 64
- load <50 x i8>, <50 x i8>* undef, align 64
- load <51 x i8>, <51 x i8>* undef, align 64
- load <52 x i8>, <52 x i8>* undef, align 64
- load <53 x i8>, <53 x i8>* undef, align 64
- load <54 x i8>, <54 x i8>* undef, align 64
- load <55 x i8>, <55 x i8>* undef, align 64
- load <56 x i8>, <56 x i8>* undef, align 64
- load <57 x i8>, <57 x i8>* undef, align 64
- load <58 x i8>, <58 x i8>* undef, align 64
- load <59 x i8>, <59 x i8>* undef, align 64
- load <60 x i8>, <60 x i8>* undef, align 64
- load <61 x i8>, <61 x i8>* undef, align 64
- load <62 x i8>, <62 x i8>* undef, align 64
- load <63 x i8>, <63 x i8>* undef, align 64
+ load <33 x i8>, ptr undef, align 64
+ load <34 x i8>, ptr undef, align 64
+ load <35 x i8>, ptr undef, align 64
+ load <36 x i8>, ptr undef, align 64
+ load <37 x i8>, ptr undef, align 64
+ load <38 x i8>, ptr undef, align 64
+ load <39 x i8>, ptr undef, align 64
+ load <40 x i8>, ptr undef, align 64
+ load <41 x i8>, ptr undef, align 64
+ load <42 x i8>, ptr undef, align 64
+ load <43 x i8>, ptr undef, align 64
+ load <44 x i8>, ptr undef, align 64
+ load <45 x i8>, ptr undef, align 64
+ load <46 x i8>, ptr undef, align 64
+ load <47 x i8>, ptr undef, align 64
+ load <48 x i8>, ptr undef, align 64
+ load <49 x i8>, ptr undef, align 64
+ load <50 x i8>, ptr undef, align 64
+ load <51 x i8>, ptr undef, align 64
+ load <52 x i8>, ptr undef, align 64
+ load <53 x i8>, ptr undef, align 64
+ load <54 x i8>, ptr undef, align 64
+ load <55 x i8>, ptr undef, align 64
+ load <56 x i8>, ptr undef, align 64
+ load <57 x i8>, ptr undef, align 64
+ load <58 x i8>, ptr undef, align 64
+ load <59 x i8>, ptr undef, align 64
+ load <60 x i8>, ptr undef, align 64
+ load <61 x i8>, ptr undef, align 64
+ load <62 x i8>, ptr undef, align 64
+ load <63 x i8>, ptr undef, align 64
; <64 x i8> is ZMM
; Partial vectors with i1 elements
; <1 x i1> is XMM
; <2 x i1> is XMM
- load <3 x i1>, <3 x i1>* undef, align 64
+ load <3 x i1>, ptr undef, align 64
; <4 x i1> is XMM
- load <5 x i1>, <5 x i1>* undef, align 64
- load <6 x i1>, <6 x i1>* undef, align 64
- load <7 x i1>, <7 x i1>* undef, align 64
+ load <5 x i1>, ptr undef, align 64
+ load <6 x i1>, ptr undef, align 64
+ load <7 x i1>, ptr undef, align 64
; <8 x i1> is XMM
- load <9 x i1>, <9 x i1>* undef, align 64
- load <10 x i1>, <10 x i1>* undef, align 64
- load <11 x i1>, <11 x i1>* undef, align 64
- load <12 x i1>, <12 x i1>* undef, align 64
- load <13 x i1>, <13 x i1>* undef, align 64
- load <14 x i1>, <14 x i1>* undef, align 64
- load <15 x i1>, <15 x i1>* undef, align 64
+ load <9 x i1>, ptr undef, align 64
+ load <10 x i1>, ptr undef, align 64
+ load <11 x i1>, ptr undef, align 64
+ load <12 x i1>, ptr undef, align 64
+ load <13 x i1>, ptr undef, align 64
+ load <14 x i1>, ptr undef, align 64
+ load <15 x i1>, ptr undef, align 64
; <16 x i1> is XMM
- load <17 x i1>, <17 x i1>* undef, align 64
- load <18 x i1>, <18 x i1>* undef, align 64
- load <19 x i1>, <19 x i1>* undef, align 64
- load <20 x i1>, <20 x i1>* undef, align 64
- load <21 x i1>, <21 x i1>* undef, align 64
- load <22 x i1>, <22 x i1>* undef, align 64
- load <23 x i1>, <23 x i1>* undef, align 64
- load <24 x i1>, <24 x i1>* undef, align 64
- load <25 x i1>, <25 x i1>* undef, align 64
- load <26 x i1>, <26 x i1>* undef, align 64
- load <27 x i1>, <27 x i1>* undef, align 64
- load <28 x i1>, <28 x i1>* undef, align 64
- load <29 x i1>, <29 x i1>* undef, align 64
- load <30 x i1>, <30 x i1>* undef, align 64
- load <31 x i1>, <31 x i1>* undef, align 64
+ load <17 x i1>, ptr undef, align 64
+ load <18 x i1>, ptr undef, align 64
+ load <19 x i1>, ptr undef, align 64
+ load <20 x i1>, ptr undef, align 64
+ load <21 x i1>, ptr undef, align 64
+ load <22 x i1>, ptr undef, align 64
+ load <23 x i1>, ptr undef, align 64
+ load <24 x i1>, ptr undef, align 64
+ load <25 x i1>, ptr undef, align 64
+ load <26 x i1>, ptr undef, align 64
+ load <27 x i1>, ptr undef, align 64
+ load <28 x i1>, ptr undef, align 64
+ load <29 x i1>, ptr undef, align 64
+ load <30 x i1>, ptr undef, align 64
+ load <31 x i1>, ptr undef, align 64
; <32 x i1> is YMM
- load <33 x i1>, <33 x i1>* undef, align 64
- load <34 x i1>, <34 x i1>* undef, align 64
- load <35 x i1>, <35 x i1>* undef, align 64
- load <36 x i1>, <36 x i1>* undef, align 64
- load <37 x i1>, <37 x i1>* undef, align 64
- load <38 x i1>, <38 x i1>* undef, align 64
- load <39 x i1>, <39 x i1>* undef, align 64
- load <40 x i1>, <40 x i1>* undef, align 64
- load <41 x i1>, <41 x i1>* undef, align 64
- load <42 x i1>, <42 x i1>* undef, align 64
- load <43 x i1>, <43 x i1>* undef, align 64
- load <44 x i1>, <44 x i1>* undef, align 64
- load <45 x i1>, <45 x i1>* undef, align 64
- load <46 x i1>, <46 x i1>* undef, align 64
- load <47 x i1>, <47 x i1>* undef, align 64
- load <48 x i1>, <48 x i1>* undef, align 64
- load <49 x i1>, <49 x i1>* undef, align 64
- load <50 x i1>, <50 x i1>* undef, align 64
- load <51 x i1>, <51 x i1>* undef, align 64
- load <52 x i1>, <52 x i1>* undef, align 64
- load <53 x i1>, <53 x i1>* undef, align 64
- load <54 x i1>, <54 x i1>* undef, align 64
- load <55 x i1>, <55 x i1>* undef, align 64
- load <56 x i1>, <56 x i1>* undef, align 64
- load <57 x i1>, <57 x i1>* undef, align 64
- load <58 x i1>, <58 x i1>* undef, align 64
- load <59 x i1>, <59 x i1>* undef, align 64
- load <60 x i1>, <60 x i1>* undef, align 64
- load <61 x i1>, <61 x i1>* undef, align 64
- load <62 x i1>, <62 x i1>* undef, align 64
- load <63 x i1>, <63 x i1>* undef, align 64
+ load <33 x i1>, ptr undef, align 64
+ load <34 x i1>, ptr undef, align 64
+ load <35 x i1>, ptr undef, align 64
+ load <36 x i1>, ptr undef, align 64
+ load <37 x i1>, ptr undef, align 64
+ load <38 x i1>, ptr undef, align 64
+ load <39 x i1>, ptr undef, align 64
+ load <40 x i1>, ptr undef, align 64
+ load <41 x i1>, ptr undef, align 64
+ load <42 x i1>, ptr undef, align 64
+ load <43 x i1>, ptr undef, align 64
+ load <44 x i1>, ptr undef, align 64
+ load <45 x i1>, ptr undef, align 64
+ load <46 x i1>, ptr undef, align 64
+ load <47 x i1>, ptr undef, align 64
+ load <48 x i1>, ptr undef, align 64
+ load <49 x i1>, ptr undef, align 64
+ load <50 x i1>, ptr undef, align 64
+ load <51 x i1>, ptr undef, align 64
+ load <52 x i1>, ptr undef, align 64
+ load <53 x i1>, ptr undef, align 64
+ load <54 x i1>, ptr undef, align 64
+ load <55 x i1>, ptr undef, align 64
+ load <56 x i1>, ptr undef, align 64
+ load <57 x i1>, ptr undef, align 64
+ load <58 x i1>, ptr undef, align 64
+ load <59 x i1>, ptr undef, align 64
+ load <60 x i1>, ptr undef, align 64
+ load <61 x i1>, ptr undef, align 64
+ load <62 x i1>, ptr undef, align 64
+ load <63 x i1>, ptr undef, align 64
; <64 x i1> is ZMM
ret i32 undef
define void @test() {
; SSE-LABEL: 'test'
-; SSE: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; SSE: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; SSE: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; SSE: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; SSE: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i32, i32* %inB, align 4
+; SSE: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; SSE: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; SSE: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; SSE: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; SSE: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i32, ptr %inB, align 4
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX1: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX1: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX1: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX1: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX1: LV: Found an estimated cost of 3000000 for VF 32 For instruction: %valB.loaded = load i32, i32* %inB, align 4
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX1: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX1: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX1: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX1: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX1: LV: Found an estimated cost of 3000000 for VF 32 For instruction: %valB.loaded = load i32, ptr %inB, align 4
;
; AVX2-SLOWGATHER-LABEL: 'test'
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 32 For instruction: %valB.loaded = load i32, i32* %inB, align 4
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 32 For instruction: %valB.loaded = load i32, ptr %inB, align 4
;
; AVX2-FASTGATHER-LABEL: 'test'
-; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX2-FASTGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX2-FASTGATHER: LV: Found an estimated cost of 6 for VF 4 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX2-FASTGATHER: LV: Found an estimated cost of 12 for VF 8 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX2-FASTGATHER: LV: Found an estimated cost of 24 for VF 16 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX2-FASTGATHER: LV: Found an estimated cost of 48 for VF 32 For instruction: %valB.loaded = load i32, i32* %inB, align 4
+; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX2-FASTGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX2-FASTGATHER: LV: Found an estimated cost of 6 for VF 4 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX2-FASTGATHER: LV: Found an estimated cost of 12 for VF 8 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX2-FASTGATHER: LV: Found an estimated cost of 24 for VF 16 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX2-FASTGATHER: LV: Found an estimated cost of 48 for VF 32 For instruction: %valB.loaded = load i32, ptr %inB, align 4
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX512: LV: Found an estimated cost of 10 for VF 2 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX512: LV: Found an estimated cost of 21 for VF 4 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX512: LV: Found an estimated cost of 18 for VF 16 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX512: LV: Found an estimated cost of 72 for VF 64 For instruction: %valB.loaded = load i32, i32* %inB, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX512: LV: Found an estimated cost of 10 for VF 2 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX512: LV: Found an estimated cost of 21 for VF 4 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX512: LV: Found an estimated cost of 18 for VF 16 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX512: LV: Found an estimated cost of 72 for VF 64 For instruction: %valB.loaded = load i32, ptr %inB, align 4
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %end ]
- %inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
- %valA = load i8, i8* %inA
+ %inA = getelementptr inbounds [1024 x i8], ptr @A, i64 0, i64 %iv
+ %valA = load i8, ptr %inA
%canLoad = icmp ne i8 %valA, 0
br i1 %canLoad, label %load, label %mask
load:
%valA.ext = sext i8 %valA to i64
- %inB = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %valA.ext
- %valB.loaded = load i32, i32* %inB
+ %inB = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %valA.ext
+ %valB.loaded = load i32, ptr %inB
br label %end
mask:
end:
%valB = phi i32 [ %valB.loaded, %load ], [ 0, %mask ]
- %out = getelementptr inbounds [1024 x i32], [1024 x i32]* @C, i64 0, i64 %iv
- store i32 %valB, i32* %out
+ %out = getelementptr inbounds [1024 x i32], ptr @C, i64 0, i64 %iv
+ store i32 %valB, ptr %out
%iv.next = add nuw nsw i64 %iv, 1
%cmp = icmp ult i64 %iv.next, 1024
define void @test() {
; SSE-LABEL: 'test'
-; SSE: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; SSE: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; SSE: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; SSE: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; SSE: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i64, i64* %inB, align 8
+; SSE: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; SSE: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; SSE: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; SSE: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; SSE: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i64, ptr %inB, align 8
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX1: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX1: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX1: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX1: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX1: LV: Found an estimated cost of 3000000 for VF 32 For instruction: %valB.loaded = load i64, i64* %inB, align 8
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX1: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX1: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX1: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX1: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX1: LV: Found an estimated cost of 3000000 for VF 32 For instruction: %valB.loaded = load i64, ptr %inB, align 8
;
; AVX2-SLOWGATHER-LABEL: 'test'
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 32 For instruction: %valB.loaded = load i64, i64* %inB, align 8
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX2-SLOWGATHER: LV: Found an estimated cost of 3000000 for VF 32 For instruction: %valB.loaded = load i64, ptr %inB, align 8
;
; AVX2-FASTGATHER-LABEL: 'test'
-; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX2-FASTGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX2-FASTGATHER: LV: Found an estimated cost of 6 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX2-FASTGATHER: LV: Found an estimated cost of 12 for VF 8 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX2-FASTGATHER: LV: Found an estimated cost of 24 for VF 16 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX2-FASTGATHER: LV: Found an estimated cost of 48 for VF 32 For instruction: %valB.loaded = load i64, i64* %inB, align 8
+; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX2-FASTGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX2-FASTGATHER: LV: Found an estimated cost of 6 for VF 4 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX2-FASTGATHER: LV: Found an estimated cost of 12 for VF 8 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX2-FASTGATHER: LV: Found an estimated cost of 24 for VF 16 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX2-FASTGATHER: LV: Found an estimated cost of 48 for VF 32 For instruction: %valB.loaded = load i64, ptr %inB, align 8
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX512: LV: Found an estimated cost of 10 for VF 2 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX512: LV: Found an estimated cost of 22 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %valB.loaded = load i64, i64* %inB, align 8
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 2 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX512: LV: Found an estimated cost of 22 for VF 4 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %valB.loaded = load i64, ptr %inB, align 8
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %end ]
- %inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
- %valA = load i8, i8* %inA
+ %inA = getelementptr inbounds [1024 x i8], ptr @A, i64 0, i64 %iv
+ %valA = load i8, ptr %inA
%canLoad = icmp ne i8 %valA, 0
br i1 %canLoad, label %load, label %mask
load:
%valA.ext = sext i8 %valA to i64
- %inB = getelementptr inbounds [1024 x i64], [1024 x i64]* @B, i64 0, i64 %valA.ext
- %valB.loaded = load i64, i64* %inB
+ %inB = getelementptr inbounds [1024 x i64], ptr @B, i64 0, i64 %valA.ext
+ %valB.loaded = load i64, ptr %inB
br label %end
mask:
end:
%valB = phi i64 [ %valB.loaded, %load ], [ 0, %mask ]
- %out = getelementptr inbounds [1024 x i64], [1024 x i64]* @C, i64 0, i64 %iv
- store i64 %valB, i64* %out
+ %out = getelementptr inbounds [1024 x i64], ptr @C, i64 0, i64 %iv
+ store i64 %valB, ptr %out
%iv.next = add nuw nsw i64 %iv, 1
%cmp = icmp ult i64 %iv.next, 1024
; }
; (relates to the testcase in PR50566)
-define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) {
+define void @test1(ptr noalias nocapture %points, ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y) {
; DISABLED_MASKED_STRIDED-LABEL: 'test1'
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
;
; ENABLED_MASKED_STRIDED-LABEL: 'test1'
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 2 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 11 for VF 4 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 11 for VF 8 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 17 for VF 16 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 11 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 11 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 17 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
;
entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%i1 = shl nuw nsw i64 %indvars.iv, 2
- %arrayidx2 = getelementptr inbounds i16, i16* %points, i64 %i1
- %i2 = load i16, i16* %arrayidx2, align 2
+ %arrayidx2 = getelementptr inbounds i16, ptr %points, i64 %i1
+ %i2 = load i16, ptr %arrayidx2, align 2
%i3 = or i64 %i1, 1
- %arrayidx7 = getelementptr inbounds i16, i16* %points, i64 %i3
- %i4 = load i16, i16* %arrayidx7, align 2
- %arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv
- store i16 %i2, i16* %arrayidx, align 2
- %arrayidx4 = getelementptr inbounds i16, i16* %y, i64 %indvars.iv
- store i16 %i4, i16* %arrayidx4, align 2
+ %arrayidx7 = getelementptr inbounds i16, ptr %points, i64 %i3
+ %i4 = load i16, ptr %arrayidx7, align 2
+ %arrayidx = getelementptr inbounds i16, ptr %x, i64 %indvars.iv
+ store i16 %i2, ptr %arrayidx, align 2
+ %arrayidx4 = getelementptr inbounds i16, ptr %y, i64 %indvars.iv
+ store i16 %i4, ptr %arrayidx4, align 2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond.not, label %for.end, label %for.body
; y[i] = points[i*4 + 1];
; }
-define void @test2(i16* noalias nocapture %points, i32 %numPoints, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) {
+define void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y) {
; DISABLED_MASKED_STRIDED-LABEL: 'test2'
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
;
; ENABLED_MASKED_STRIDED-LABEL: 'test2'
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 2 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 11 for VF 4 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 11 for VF 8 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 17 for VF 16 For instruction: %i2 = load i16, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: %i4 = load i16, i16* %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 11 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 11 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 17 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
;
entry:
%cmp15 = icmp sgt i32 %numPoints, 0
for.body:
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
%i1 = shl nuw nsw i64 %indvars.iv, 2
- %arrayidx2 = getelementptr inbounds i16, i16* %points, i64 %i1
- %i2 = load i16, i16* %arrayidx2, align 2
+ %arrayidx2 = getelementptr inbounds i16, ptr %points, i64 %i1
+ %i2 = load i16, ptr %arrayidx2, align 2
%i3 = or i64 %i1, 1
- %arrayidx7 = getelementptr inbounds i16, i16* %points, i64 %i3
- %i4 = load i16, i16* %arrayidx7, align 2
- %arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv
- store i16 %i2, i16* %arrayidx, align 2
- %arrayidx4 = getelementptr inbounds i16, i16* %y, i64 %indvars.iv
- store i16 %i4, i16* %arrayidx4, align 2
+ %arrayidx7 = getelementptr inbounds i16, ptr %points, i64 %i3
+ %i4 = load i16, ptr %arrayidx7, align 2
+ %arrayidx = getelementptr inbounds i16, ptr %x, i64 %indvars.iv
+ store i16 %i2, ptr %arrayidx, align 2
+ %arrayidx4 = getelementptr inbounds i16, ptr %y, i64 %indvars.iv
+ store i16 %i4, ptr %arrayidx4, align 2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond.not, label %for.end.loopexit, label %for.body
; x[i] = points[i*3];
; }
-define void @test(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readnone %y) {
+define void @test(ptr noalias nocapture %points, ptr noalias nocapture readonly %x, ptr noalias nocapture readnone %y) {
; DISABLED_MASKED_STRIDED-LABEL: 'test'
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, i16* %arrayidx, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, i16* %arrayidx6, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 2 For instruction: %i2 = load i16, i16* %arrayidx, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %i4 = load i16, i16* %arrayidx6, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 4 For instruction: %i2 = load i16, i16* %arrayidx, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %i4 = load i16, i16* %arrayidx6, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 8 For instruction: %i2 = load i16, i16* %arrayidx, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %i4 = load i16, i16* %arrayidx6, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 16 For instruction: %i2 = load i16, i16* %arrayidx, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %i4 = load i16, i16* %arrayidx6, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
;
; ENABLED_MASKED_STRIDED-LABEL: 'test'
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, i16* %arrayidx, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, i16* %arrayidx6, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 2 For instruction: %i2 = load i16, i16* %arrayidx, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 7 for VF 2 For instruction: %i4 = load i16, i16* %arrayidx6, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 4 For instruction: %i2 = load i16, i16* %arrayidx, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 9 for VF 4 For instruction: %i4 = load i16, i16* %arrayidx6, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 8 For instruction: %i2 = load i16, i16* %arrayidx, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 9 for VF 8 For instruction: %i4 = load i16, i16* %arrayidx6, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 16 For instruction: %i2 = load i16, i16* %arrayidx, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 16 For instruction: %i4 = load i16, i16* %arrayidx6, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 7 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 9 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 9 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
;
entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
- %arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv
- %i2 = load i16, i16* %arrayidx, align 2
+ %arrayidx = getelementptr inbounds i16, ptr %x, i64 %indvars.iv
+ %i2 = load i16, ptr %arrayidx, align 2
%cmp1 = icmp sgt i16 %i2, 0
br i1 %cmp1, label %if.then, label %for.inc
if.then:
%i1 = mul nuw nsw i64 %indvars.iv, 3
- %arrayidx6 = getelementptr inbounds i16, i16* %points, i64 %i1
- %i4 = load i16, i16* %arrayidx6, align 2
- store i16 %i4, i16* %arrayidx, align 2
+ %arrayidx6 = getelementptr inbounds i16, ptr %points, i64 %i1
+ %i4 = load i16, ptr %arrayidx6, align 2
+ store i16 %i4, ptr %arrayidx, align 2
br label %for.inc
for.inc:
; }
; (relates to the testcase in PR50566)
-define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) {
+define void @test1(ptr noalias nocapture %points, ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y) {
; DISABLED_MASKED_STRIDED-LABEL: 'test1'
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: store i16 %2, ptr %arrayidx7, align 2
;
; ENABLED_MASKED_STRIDED-LABEL: 'test1'
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 12 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 12 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 8 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 16 For instruction: store i16 %2, ptr %arrayidx7, align 2
;
entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv
- %0 = load i16, i16* %arrayidx, align 2
+ %arrayidx = getelementptr inbounds i16, ptr %x, i64 %indvars.iv
+ %0 = load i16, ptr %arrayidx, align 2
%1 = shl nuw nsw i64 %indvars.iv, 2
- %arrayidx2 = getelementptr inbounds i16, i16* %points, i64 %1
- store i16 %0, i16* %arrayidx2, align 2
- %arrayidx4 = getelementptr inbounds i16, i16* %y, i64 %indvars.iv
- %2 = load i16, i16* %arrayidx4, align 2
+ %arrayidx2 = getelementptr inbounds i16, ptr %points, i64 %1
+ store i16 %0, ptr %arrayidx2, align 2
+ %arrayidx4 = getelementptr inbounds i16, ptr %y, i64 %indvars.iv
+ %2 = load i16, ptr %arrayidx4, align 2
%3 = or i64 %1, 1
- %arrayidx7 = getelementptr inbounds i16, i16* %points, i64 %3
- store i16 %2, i16* %arrayidx7, align 2
+ %arrayidx7 = getelementptr inbounds i16, ptr %points, i64 %3
+ store i16 %2, ptr %arrayidx7, align 2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond.not, label %for.end, label %for.body
; points[i*4 + 1] = y[i];
; }
-define void @test2(i16* noalias nocapture %points, i32 %numPoints, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) {
+define void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y) {
; DISABLED_MASKED_STRIDED-LABEL: 'test2'
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 5 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 10 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 21 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 43 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 5 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 10 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 21 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 43 for VF 16 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: store i16 %2, ptr %arrayidx7, align 2
;
; ENABLED_MASKED_STRIDED-LABEL: 'test2'
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 10 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 10 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 8 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 16 For instruction: store i16 %2, ptr %arrayidx7, align 2
;
entry:
%cmp15 = icmp sgt i32 %numPoints, 0
for.body:
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv
- %0 = load i16, i16* %arrayidx, align 2
+ %arrayidx = getelementptr inbounds i16, ptr %x, i64 %indvars.iv
+ %0 = load i16, ptr %arrayidx, align 2
%1 = shl nsw i64 %indvars.iv, 2
- %arrayidx2 = getelementptr inbounds i16, i16* %points, i64 %1
- store i16 %0, i16* %arrayidx2, align 2
- %arrayidx4 = getelementptr inbounds i16, i16* %y, i64 %indvars.iv
- %2 = load i16, i16* %arrayidx4, align 2
+ %arrayidx2 = getelementptr inbounds i16, ptr %points, i64 %1
+ store i16 %0, ptr %arrayidx2, align 2
+ %arrayidx4 = getelementptr inbounds i16, ptr %y, i64 %indvars.iv
+ %2 = load i16, ptr %arrayidx4, align 2
%3 = or i64 %1, 1
- %arrayidx7 = getelementptr inbounds i16, i16* %points, i64 %3
- store i16 %2, i16* %arrayidx7, align 2
+ %arrayidx7 = getelementptr inbounds i16, ptr %points, i64 %3
+ store i16 %2, ptr %arrayidx7, align 2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond.not, label %for.end.loopexit, label %for.body
; points[i*3] = x[i];
; }
-define void @test(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readnone %y) {
+define void @test(ptr noalias nocapture %points, ptr noalias nocapture readonly %x, ptr noalias nocapture readnone %y) {
; DISABLED_MASKED_STRIDED-LABEL: 'test'
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx6, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, i16* %arrayidx6, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, i16* %arrayidx6, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, i16* %arrayidx6, align 2
-; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %0, i16* %arrayidx6, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx6, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, ptr %arrayidx6, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, ptr %arrayidx6, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, ptr %arrayidx6, align 2
+; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %0, ptr %arrayidx6, align 2
;
; ENABLED_MASKED_STRIDED-LABEL: 'test'
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx6, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, i16* %arrayidx6, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, i16* %arrayidx6, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, i16* %arrayidx6, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %0, i16* %arrayidx6, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx6, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, ptr %arrayidx6, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, ptr %arrayidx6, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, ptr %arrayidx6, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %0, ptr %arrayidx6, align 2
;
entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
- %arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv
- %0 = load i16, i16* %arrayidx, align 2
+ %arrayidx = getelementptr inbounds i16, ptr %x, i64 %indvars.iv
+ %0 = load i16, ptr %arrayidx, align 2
%cmp1 = icmp sgt i16 %0, 0
br i1 %cmp1, label %if.then, label %for.inc
if.then:
%1 = mul nuw nsw i64 %indvars.iv, 3
- %arrayidx6 = getelementptr inbounds i16, i16* %points, i64 %1
- store i16 %0, i16* %arrayidx6, align 2
+ %arrayidx6 = getelementptr inbounds i16, ptr %points, i64 %1
+ store i16 %0, ptr %arrayidx6, align 2
br label %for.inc
for.inc:
define i32 @masked_load() {
; SSE2-LABEL: 'masked_load'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0v7f64(<7 x double>* undef, i32 1, <7 x i1> undef, <7 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0v6f64(<6 x double>* undef, i32 1, <6 x i1> undef, <6 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0v5f64(<5 x double>* undef, i32 1, <5 x i1> undef, <5 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0v3f64(<3 x double>* undef, i32 1, <3 x i1> undef, <3 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0v15f32(<15 x float>* undef, i32 1, <15 x i1> undef, <15 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0v14f32(<14 x float>* undef, i32 1, <14 x i1> undef, <14 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0v13f32(<13 x float>* undef, i32 1, <13 x i1> undef, <13 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0v12f32(<12 x float>* undef, i32 1, <12 x i1> undef, <12 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0v11f32(<11 x float>* undef, i32 1, <11 x i1> undef, <11 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0v10f32(<10 x float>* undef, i32 1, <10 x i1> undef, <10 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0v9f32(<9 x float>* undef, i32 1, <9 x i1> undef, <9 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>* undef, i32 1, <7 x i1> undef, <7 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0v6f32(<6 x float>* undef, i32 1, <6 x i1> undef, <6 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0v5f32(<5 x float>* undef, i32 1, <5 x i1> undef, <5 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0v3f32(<3 x float>* undef, i32 1, <3 x i1> undef, <3 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0v1f32(<1 x float>* undef, i32 1, <1 x i1> undef, <1 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0v7i64(<7 x i64>* undef, i32 1, <7 x i1> undef, <7 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0v6i64(<6 x i64>* undef, i32 1, <6 x i1> undef, <6 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0v5i64(<5 x i64>* undef, i32 1, <5 x i1> undef, <5 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0v3i64(<3 x i64>* undef, i32 1, <3 x i1> undef, <3 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0v15i32(<15 x i32>* undef, i32 1, <15 x i1> undef, <15 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0v14i32(<14 x i32>* undef, i32 1, <14 x i1> undef, <14 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0v13i32(<13 x i32>* undef, i32 1, <13 x i1> undef, <13 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0v12i32(<12 x i32>* undef, i32 1, <12 x i1> undef, <12 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0v11i32(<11 x i32>* undef, i32 1, <11 x i1> undef, <11 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0v10i32(<10 x i32>* undef, i32 1, <10 x i1> undef, <10 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0v9i32(<9 x i32>* undef, i32 1, <9 x i1> undef, <9 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0v7i32(<7 x i32>* undef, i32 1, <7 x i1> undef, <7 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0v6i32(<6 x i32>* undef, i32 1, <6 x i1> undef, <6 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0v5i32(<5 x i32>* undef, i32 1, <5 x i1> undef, <5 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SSE42-LABEL: 'masked_load'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0v7f64(<7 x double>* undef, i32 1, <7 x i1> undef, <7 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0v6f64(<6 x double>* undef, i32 1, <6 x i1> undef, <6 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0v5f64(<5 x double>* undef, i32 1, <5 x i1> undef, <5 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0v3f64(<3 x double>* undef, i32 1, <3 x i1> undef, <3 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0v15f32(<15 x float>* undef, i32 1, <15 x i1> undef, <15 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0v14f32(<14 x float>* undef, i32 1, <14 x i1> undef, <14 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0v13f32(<13 x float>* undef, i32 1, <13 x i1> undef, <13 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0v12f32(<12 x float>* undef, i32 1, <12 x i1> undef, <12 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0v11f32(<11 x float>* undef, i32 1, <11 x i1> undef, <11 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0v10f32(<10 x float>* undef, i32 1, <10 x i1> undef, <10 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0v9f32(<9 x float>* undef, i32 1, <9 x i1> undef, <9 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>* undef, i32 1, <7 x i1> undef, <7 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0v6f32(<6 x float>* undef, i32 1, <6 x i1> undef, <6 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0v5f32(<5 x float>* undef, i32 1, <5 x i1> undef, <5 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0v3f32(<3 x float>* undef, i32 1, <3 x i1> undef, <3 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0v1f32(<1 x float>* undef, i32 1, <1 x i1> undef, <1 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0v7i64(<7 x i64>* undef, i32 1, <7 x i1> undef, <7 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0v6i64(<6 x i64>* undef, i32 1, <6 x i1> undef, <6 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0v5i64(<5 x i64>* undef, i32 1, <5 x i1> undef, <5 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0v3i64(<3 x i64>* undef, i32 1, <3 x i1> undef, <3 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0v15i32(<15 x i32>* undef, i32 1, <15 x i1> undef, <15 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0v14i32(<14 x i32>* undef, i32 1, <14 x i1> undef, <14 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0v13i32(<13 x i32>* undef, i32 1, <13 x i1> undef, <13 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0v12i32(<12 x i32>* undef, i32 1, <12 x i1> undef, <12 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0v11i32(<11 x i32>* undef, i32 1, <11 x i1> undef, <11 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0v10i32(<10 x i32>* undef, i32 1, <10 x i1> undef, <10 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0v9i32(<9 x i32>* undef, i32 1, <9 x i1> undef, <9 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0v7i32(<7 x i32>* undef, i32 1, <7 x i1> undef, <7 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0v6i32(<6 x i32>* undef, i32 1, <6 x i1> undef, <6 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0v5i32(<5 x i32>* undef, i32 1, <5 x i1> undef, <5 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; AVX-LABEL: 'masked_load'
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0v7f64(<7 x double>* undef, i32 1, <7 x i1> undef, <7 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0v6f64(<6 x double>* undef, i32 1, <6 x i1> undef, <6 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0v5f64(<5 x double>* undef, i32 1, <5 x i1> undef, <5 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0v3f64(<3 x double>* undef, i32 1, <3 x i1> undef, <3 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0v15f32(<15 x float>* undef, i32 1, <15 x i1> undef, <15 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0v14f32(<14 x float>* undef, i32 1, <14 x i1> undef, <14 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0v13f32(<13 x float>* undef, i32 1, <13 x i1> undef, <13 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0v12f32(<12 x float>* undef, i32 1, <12 x i1> undef, <12 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0v11f32(<11 x float>* undef, i32 1, <11 x i1> undef, <11 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0v10f32(<10 x float>* undef, i32 1, <10 x i1> undef, <10 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0v9f32(<9 x float>* undef, i32 1, <9 x i1> undef, <9 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>* undef, i32 1, <7 x i1> undef, <7 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0v6f32(<6 x float>* undef, i32 1, <6 x i1> undef, <6 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0v5f32(<5 x float>* undef, i32 1, <5 x i1> undef, <5 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0v3f32(<3 x float>* undef, i32 1, <3 x i1> undef, <3 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0v1f32(<1 x float>* undef, i32 1, <1 x i1> undef, <1 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0v7i64(<7 x i64>* undef, i32 1, <7 x i1> undef, <7 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0v6i64(<6 x i64>* undef, i32 1, <6 x i1> undef, <6 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0v5i64(<5 x i64>* undef, i32 1, <5 x i1> undef, <5 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0v3i64(<3 x i64>* undef, i32 1, <3 x i1> undef, <3 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0v15i32(<15 x i32>* undef, i32 1, <15 x i1> undef, <15 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0v14i32(<14 x i32>* undef, i32 1, <14 x i1> undef, <14 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0v13i32(<13 x i32>* undef, i32 1, <13 x i1> undef, <13 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0v12i32(<12 x i32>* undef, i32 1, <12 x i1> undef, <12 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0v11i32(<11 x i32>* undef, i32 1, <11 x i1> undef, <11 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0v10i32(<10 x i32>* undef, i32 1, <10 x i1> undef, <10 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0v9i32(<9 x i32>* undef, i32 1, <9 x i1> undef, <9 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0v7i32(<7 x i32>* undef, i32 1, <7 x i1> undef, <7 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0v6i32(<6 x i32>* undef, i32 1, <6 x i1> undef, <6 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0v5i32(<5 x i32>* undef, i32 1, <5 x i1> undef, <5 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; KNL-LABEL: 'masked_load'
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0v7f64(<7 x double>* undef, i32 1, <7 x i1> undef, <7 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0v6f64(<6 x double>* undef, i32 1, <6 x i1> undef, <6 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0v5f64(<5 x double>* undef, i32 1, <5 x i1> undef, <5 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0v3f64(<3 x double>* undef, i32 1, <3 x i1> undef, <3 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0v15f32(<15 x float>* undef, i32 1, <15 x i1> undef, <15 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0v14f32(<14 x float>* undef, i32 1, <14 x i1> undef, <14 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0v13f32(<13 x float>* undef, i32 1, <13 x i1> undef, <13 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0v12f32(<12 x float>* undef, i32 1, <12 x i1> undef, <12 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0v11f32(<11 x float>* undef, i32 1, <11 x i1> undef, <11 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0v10f32(<10 x float>* undef, i32 1, <10 x i1> undef, <10 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0v9f32(<9 x float>* undef, i32 1, <9 x i1> undef, <9 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>* undef, i32 1, <7 x i1> undef, <7 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0v6f32(<6 x float>* undef, i32 1, <6 x i1> undef, <6 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0v5f32(<5 x float>* undef, i32 1, <5 x i1> undef, <5 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0v3f32(<3 x float>* undef, i32 1, <3 x i1> undef, <3 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0v1f32(<1 x float>* undef, i32 1, <1 x i1> undef, <1 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0v7i64(<7 x i64>* undef, i32 1, <7 x i1> undef, <7 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0v6i64(<6 x i64>* undef, i32 1, <6 x i1> undef, <6 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0v5i64(<5 x i64>* undef, i32 1, <5 x i1> undef, <5 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0v3i64(<3 x i64>* undef, i32 1, <3 x i1> undef, <3 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0v15i32(<15 x i32>* undef, i32 1, <15 x i1> undef, <15 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0v14i32(<14 x i32>* undef, i32 1, <14 x i1> undef, <14 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0v13i32(<13 x i32>* undef, i32 1, <13 x i1> undef, <13 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0v12i32(<12 x i32>* undef, i32 1, <12 x i1> undef, <12 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0v11i32(<11 x i32>* undef, i32 1, <11 x i1> undef, <11 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0v10i32(<10 x i32>* undef, i32 1, <10 x i1> undef, <10 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0v9i32(<9 x i32>* undef, i32 1, <9 x i1> undef, <9 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0v7i32(<7 x i32>* undef, i32 1, <7 x i1> undef, <7 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0v6i32(<6 x i32>* undef, i32 1, <6 x i1> undef, <6 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0v5i32(<5 x i32>* undef, i32 1, <5 x i1> undef, <5 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SKX-LABEL: 'masked_load'
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0v7f64(<7 x double>* undef, i32 1, <7 x i1> undef, <7 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0v6f64(<6 x double>* undef, i32 1, <6 x i1> undef, <6 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0v5f64(<5 x double>* undef, i32 1, <5 x i1> undef, <5 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0v3f64(<3 x double>* undef, i32 1, <3 x i1> undef, <3 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0v15f32(<15 x float>* undef, i32 1, <15 x i1> undef, <15 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0v14f32(<14 x float>* undef, i32 1, <14 x i1> undef, <14 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0v13f32(<13 x float>* undef, i32 1, <13 x i1> undef, <13 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0v12f32(<12 x float>* undef, i32 1, <12 x i1> undef, <12 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0v11f32(<11 x float>* undef, i32 1, <11 x i1> undef, <11 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0v10f32(<10 x float>* undef, i32 1, <10 x i1> undef, <10 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0v9f32(<9 x float>* undef, i32 1, <9 x i1> undef, <9 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>* undef, i32 1, <7 x i1> undef, <7 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0v6f32(<6 x float>* undef, i32 1, <6 x i1> undef, <6 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0v5f32(<5 x float>* undef, i32 1, <5 x i1> undef, <5 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0v3f32(<3 x float>* undef, i32 1, <3 x i1> undef, <3 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0v1f32(<1 x float>* undef, i32 1, <1 x i1> undef, <1 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0v7i64(<7 x i64>* undef, i32 1, <7 x i1> undef, <7 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0v6i64(<6 x i64>* undef, i32 1, <6 x i1> undef, <6 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0v5i64(<5 x i64>* undef, i32 1, <5 x i1> undef, <5 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0v3i64(<3 x i64>* undef, i32 1, <3 x i1> undef, <3 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0v15i32(<15 x i32>* undef, i32 1, <15 x i1> undef, <15 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0v14i32(<14 x i32>* undef, i32 1, <14 x i1> undef, <14 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0v13i32(<13 x i32>* undef, i32 1, <13 x i1> undef, <13 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0v12i32(<12 x i32>* undef, i32 1, <12 x i1> undef, <12 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0v11i32(<11 x i32>* undef, i32 1, <11 x i1> undef, <11 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0v10i32(<10 x i32>* undef, i32 1, <10 x i1> undef, <10 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0v9i32(<9 x i32>* undef, i32 1, <9 x i1> undef, <9 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0v7i32(<7 x i32>* undef, i32 1, <7 x i1> undef, <7 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0v6i32(<6 x i32>* undef, i32 1, <6 x i1> undef, <6 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0v5i32(<5 x i32>* undef, i32 1, <5 x i1> undef, <5 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
- %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
- %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0v7f64(<7 x double>* undef, i32 1, <7 x i1> undef, <7 x double> undef)
- %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0v6f64(<6 x double>* undef, i32 1, <6 x i1> undef, <6 x double> undef)
- %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0v5f64(<5 x double>* undef, i32 1, <5 x i1> undef, <5 x double> undef)
- %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)
- %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0v3f64(<3 x double>* undef, i32 1, <3 x i1> undef, <3 x double> undef)
- %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)
- %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef)
-
- %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)
- %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0v15f32(<15 x float>* undef, i32 1, <15 x i1> undef, <15 x float> undef)
- %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0v14f32(<14 x float>* undef, i32 1, <14 x i1> undef, <14 x float> undef)
- %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0v13f32(<13 x float>* undef, i32 1, <13 x i1> undef, <13 x float> undef)
- %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0v12f32(<12 x float>* undef, i32 1, <12 x i1> undef, <12 x float> undef)
- %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0v11f32(<11 x float>* undef, i32 1, <11 x i1> undef, <11 x float> undef)
- %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0v10f32(<10 x float>* undef, i32 1, <10 x i1> undef, <10 x float> undef)
- %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0v9f32(<9 x float>* undef, i32 1, <9 x i1> undef, <9 x float> undef)
- %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)
- %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>* undef, i32 1, <7 x i1> undef, <7 x float> undef)
- %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0v6f32(<6 x float>* undef, i32 1, <6 x i1> undef, <6 x float> undef)
- %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0v5f32(<5 x float>* undef, i32 1, <5 x i1> undef, <5 x float> undef)
- %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)
- %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0v3f32(<3 x float>* undef, i32 1, <3 x i1> undef, <3 x float> undef)
- %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)
- %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0v1f32(<1 x float>* undef, i32 1, <1 x i1> undef, <1 x float> undef)
-
- %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)
- %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0v7i64(<7 x i64>* undef, i32 1, <7 x i1> undef, <7 x i64> undef)
- %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0v6i64(<6 x i64>* undef, i32 1, <6 x i1> undef, <6 x i64> undef)
- %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0v5i64(<5 x i64>* undef, i32 1, <5 x i1> undef, <5 x i64> undef)
- %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)
- %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0v3i64(<3 x i64>* undef, i32 1, <3 x i1> undef, <3 x i64> undef)
- %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)
- %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-
- %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)
- %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0v15i32(<15 x i32>* undef, i32 1, <15 x i1> undef, <15 x i32> undef)
- %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0v14i32(<14 x i32>* undef, i32 1, <14 x i1> undef, <14 x i32> undef)
- %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0v13i32(<13 x i32>* undef, i32 1, <13 x i1> undef, <13 x i32> undef)
- %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0v12i32(<12 x i32>* undef, i32 1, <12 x i1> undef, <12 x i32> undef)
- %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0v11i32(<11 x i32>* undef, i32 1, <11 x i1> undef, <11 x i32> undef)
- %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0v10i32(<10 x i32>* undef, i32 1, <10 x i1> undef, <10 x i32> undef)
- %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0v9i32(<9 x i32>* undef, i32 1, <9 x i1> undef, <9 x i32> undef)
- %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
- %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0v7i32(<7 x i32>* undef, i32 1, <7 x i1> undef, <7 x i32> undef)
- %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0v6i32(<6 x i32>* undef, i32 1, <6 x i1> undef, <6 x i32> undef)
- %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0v5i32(<5 x i32>* undef, i32 1, <5 x i1> undef, <5 x i32> undef)
- %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
- %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef)
- %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
- %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef)
-
- %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
- %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
- %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
- %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-
- %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
- %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
- %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
- %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+ %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef)
+ %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef)
+ %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef)
+ %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef)
+ %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef)
+ %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef)
+ %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef)
+ %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef)
+
+ %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef)
+ %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef)
+ %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef)
+ %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef)
+ %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef)
+ %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef)
+ %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef)
+ %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef)
+ %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef)
+ %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef)
+ %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef)
+ %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef)
+ %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef)
+ %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef)
+ %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef)
+ %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef)
+
+ %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+ %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef)
+ %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef)
+ %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef)
+ %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+ %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef)
+ %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+ %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+
+ %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+ %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef)
+ %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef)
+ %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef)
+ %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef)
+ %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef)
+ %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef)
+ %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef)
+ %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+ %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef)
+ %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef)
+ %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef)
+ %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+ %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef)
+ %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+ %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef)
+
+ %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+ %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+ %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+ %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+
+ %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+ %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+ %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+ %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
ret i32 0
}
define i32 @masked_store() {
; SSE2-LABEL: 'masked_store'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v7f64.p0v7f64(<7 x double> undef, <7 x double>* undef, i32 1, <7 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v6f64.p0v6f64(<6 x double> undef, <6 x double>* undef, i32 1, <6 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v5f64.p0v5f64(<5 x double> undef, <5 x double>* undef, i32 1, <5 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3f64.p0v3f64(<3 x double> undef, <3 x double>* undef, i32 1, <3 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.store.v15f32.p0v15f32(<15 x float> undef, <15 x float>* undef, i32 1, <15 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v14f32.p0v14f32(<14 x float> undef, <14 x float>* undef, i32 1, <14 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v13f32.p0v13f32(<13 x float> undef, <13 x float>* undef, i32 1, <13 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v12f32.p0v12f32(<12 x float> undef, <12 x float>* undef, i32 1, <12 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.store.v11f32.p0v11f32(<11 x float> undef, <11 x float>* undef, i32 1, <11 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v10f32.p0v10f32(<10 x float> undef, <10 x float>* undef, i32 1, <10 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.store.v9f32.p0v9f32(<9 x float> undef, <9 x float>* undef, i32 1, <9 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v7f32.p0v7f32(<7 x float> undef, <7 x float>* undef, i32 1, <7 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v6f32.p0v6f32(<6 x float> undef, <6 x float>* undef, i32 1, <6 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v5f32.p0v5f32(<5 x float> undef, <5 x float>* undef, i32 1, <5 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.store.v3f32.p0v3f32(<3 x float> undef, <3 x float>* undef, i32 1, <3 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0v1f32(<1 x float> undef, <1 x float>* undef, i32 1, <1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v7i64.p0v7i64(<7 x i64> undef, <7 x i64>* undef, i32 1, <7 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v6i64.p0v6i64(<6 x i64> undef, <6 x i64>* undef, i32 1, <6 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v5i64.p0v5i64(<5 x i64> undef, <5 x i64>* undef, i32 1, <5 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i64.p0v3i64(<3 x i64> undef, <3 x i64>* undef, i32 1, <3 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: call void @llvm.masked.store.v15i32.p0v15i32(<15 x i32> undef, <15 x i32>* undef, i32 1, <15 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v14i32.p0v14i32(<14 x i32> undef, <14 x i32>* undef, i32 1, <14 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: call void @llvm.masked.store.v13i32.p0v13i32(<13 x i32> undef, <13 x i32>* undef, i32 1, <13 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.store.v12i32.p0v12i32(<12 x i32> undef, <12 x i32>* undef, i32 1, <12 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: call void @llvm.masked.store.v11i32.p0v11i32(<11 x i32> undef, <11 x i32>* undef, i32 1, <11 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v10i32.p0v10i32(<10 x i32> undef, <10 x i32>* undef, i32 1, <10 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.store.v9i32.p0v9i32(<9 x i32> undef, <9 x i32>* undef, i32 1, <9 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v7i32.p0v7i32(<7 x i32> undef, <7 x i32>* undef, i32 1, <7 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v6i32.p0v6i32(<6 x i32> undef, <6 x i32>* undef, i32 1, <6 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v5i32.p0v5i32(<5 x i32> undef, <5 x i32>* undef, i32 1, <5 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v3i32.p0v3i32(<3 x i32> undef, <3 x i32>* undef, i32 1, <3 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0v1i32(<1 x i32> undef, <1 x i32>* undef, i32 1, <1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 376 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 376 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SSE42-LABEL: 'masked_store'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v7f64.p0v7f64(<7 x double> undef, <7 x double>* undef, i32 1, <7 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v6f64.p0v6f64(<6 x double> undef, <6 x double>* undef, i32 1, <6 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0v5f64(<5 x double> undef, <5 x double>* undef, i32 1, <5 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v3f64.p0v3f64(<3 x double> undef, <3 x double>* undef, i32 1, <3 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v15f32.p0v15f32(<15 x float> undef, <15 x float>* undef, i32 1, <15 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v14f32.p0v14f32(<14 x float> undef, <14 x float>* undef, i32 1, <14 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v13f32.p0v13f32(<13 x float> undef, <13 x float>* undef, i32 1, <13 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v12f32.p0v12f32(<12 x float> undef, <12 x float>* undef, i32 1, <12 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.store.v11f32.p0v11f32(<11 x float> undef, <11 x float>* undef, i32 1, <11 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v10f32.p0v10f32(<10 x float> undef, <10 x float>* undef, i32 1, <10 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v9f32.p0v9f32(<9 x float> undef, <9 x float>* undef, i32 1, <9 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v7f32.p0v7f32(<7 x float> undef, <7 x float>* undef, i32 1, <7 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v6f32.p0v6f32(<6 x float> undef, <6 x float>* undef, i32 1, <6 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v5f32.p0v5f32(<5 x float> undef, <5 x float>* undef, i32 1, <5 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.store.v3f32.p0v3f32(<3 x float> undef, <3 x float>* undef, i32 1, <3 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0v1f32(<1 x float> undef, <1 x float>* undef, i32 1, <1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v7i64.p0v7i64(<7 x i64> undef, <7 x i64>* undef, i32 1, <7 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v6i64.p0v6i64(<6 x i64> undef, <6 x i64>* undef, i32 1, <6 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v5i64.p0v5i64(<5 x i64> undef, <5 x i64>* undef, i32 1, <5 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3i64.p0v3i64(<3 x i64> undef, <3 x i64>* undef, i32 1, <3 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v15i32.p0v15i32(<15 x i32> undef, <15 x i32>* undef, i32 1, <15 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v14i32.p0v14i32(<14 x i32> undef, <14 x i32>* undef, i32 1, <14 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v13i32.p0v13i32(<13 x i32> undef, <13 x i32>* undef, i32 1, <13 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v12i32.p0v12i32(<12 x i32> undef, <12 x i32>* undef, i32 1, <12 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.store.v11i32.p0v11i32(<11 x i32> undef, <11 x i32>* undef, i32 1, <11 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v10i32.p0v10i32(<10 x i32> undef, <10 x i32>* undef, i32 1, <10 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.store.v9i32.p0v9i32(<9 x i32> undef, <9 x i32>* undef, i32 1, <9 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v7i32.p0v7i32(<7 x i32> undef, <7 x i32>* undef, i32 1, <7 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v6i32.p0v6i32(<6 x i32> undef, <6 x i32>* undef, i32 1, <6 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v5i32.p0v5i32(<5 x i32> undef, <5 x i32>* undef, i32 1, <5 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3i32.p0v3i32(<3 x i32> undef, <3 x i32>* undef, i32 1, <3 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0v1i32(<1 x i32> undef, <1 x i32>* undef, i32 1, <1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; AVX-LABEL: 'masked_store'
-; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7f64.p0v7f64(<7 x double> undef, <7 x double>* undef, i32 1, <7 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6f64.p0v6f64(<6 x double> undef, <6 x double>* undef, i32 1, <6 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0v5f64(<5 x double> undef, <5 x double>* undef, i32 1, <5 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f64.p0v3f64(<3 x double> undef, <3 x double>* undef, i32 1, <3 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15f32.p0v15f32(<15 x float> undef, <15 x float>* undef, i32 1, <15 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14f32.p0v14f32(<14 x float> undef, <14 x float>* undef, i32 1, <14 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13f32.p0v13f32(<13 x float> undef, <13 x float>* undef, i32 1, <13 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12f32.p0v12f32(<12 x float> undef, <12 x float>* undef, i32 1, <12 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11f32.p0v11f32(<11 x float> undef, <11 x float>* undef, i32 1, <11 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10f32.p0v10f32(<10 x float> undef, <10 x float>* undef, i32 1, <10 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9f32.p0v9f32(<9 x float> undef, <9 x float>* undef, i32 1, <9 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7f32.p0v7f32(<7 x float> undef, <7 x float>* undef, i32 1, <7 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6f32.p0v6f32(<6 x float> undef, <6 x float>* undef, i32 1, <6 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5f32.p0v5f32(<5 x float> undef, <5 x float>* undef, i32 1, <5 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f32.p0v3f32(<3 x float> undef, <3 x float>* undef, i32 1, <3 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0v1f32(<1 x float> undef, <1 x float>* undef, i32 1, <1 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7i64.p0v7i64(<7 x i64> undef, <7 x i64>* undef, i32 1, <7 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6i64.p0v6i64(<6 x i64> undef, <6 x i64>* undef, i32 1, <6 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5i64.p0v5i64(<5 x i64> undef, <5 x i64>* undef, i32 1, <5 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i64.p0v3i64(<3 x i64> undef, <3 x i64>* undef, i32 1, <3 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15i32.p0v15i32(<15 x i32> undef, <15 x i32>* undef, i32 1, <15 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14i32.p0v14i32(<14 x i32> undef, <14 x i32>* undef, i32 1, <14 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13i32.p0v13i32(<13 x i32> undef, <13 x i32>* undef, i32 1, <13 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12i32.p0v12i32(<12 x i32> undef, <12 x i32>* undef, i32 1, <12 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11i32.p0v11i32(<11 x i32> undef, <11 x i32>* undef, i32 1, <11 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10i32.p0v10i32(<10 x i32> undef, <10 x i32>* undef, i32 1, <10 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9i32.p0v9i32(<9 x i32> undef, <9 x i32>* undef, i32 1, <9 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7i32.p0v7i32(<7 x i32> undef, <7 x i32>* undef, i32 1, <7 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6i32.p0v6i32(<6 x i32> undef, <6 x i32>* undef, i32 1, <6 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5i32.p0v5i32(<5 x i32> undef, <5 x i32>* undef, i32 1, <5 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i32.p0v3i32(<3 x i32> undef, <3 x i32>* undef, i32 1, <3 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0v1i32(<1 x i32> undef, <1 x i32>* undef, i32 1, <1 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; KNL-LABEL: 'masked_store'
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0v7f64(<7 x double> undef, <7 x double>* undef, i32 1, <7 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0v6f64(<6 x double> undef, <6 x double>* undef, i32 1, <6 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0v5f64(<5 x double> undef, <5 x double>* undef, i32 1, <5 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0v3f64(<3 x double> undef, <3 x double>* undef, i32 1, <3 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0v15f32(<15 x float> undef, <15 x float>* undef, i32 1, <15 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0v14f32(<14 x float> undef, <14 x float>* undef, i32 1, <14 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0v13f32(<13 x float> undef, <13 x float>* undef, i32 1, <13 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0v12f32(<12 x float> undef, <12 x float>* undef, i32 1, <12 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0v11f32(<11 x float> undef, <11 x float>* undef, i32 1, <11 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0v10f32(<10 x float> undef, <10 x float>* undef, i32 1, <10 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0v9f32(<9 x float> undef, <9 x float>* undef, i32 1, <9 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0v7f32(<7 x float> undef, <7 x float>* undef, i32 1, <7 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0v6f32(<6 x float> undef, <6 x float>* undef, i32 1, <6 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0v5f32(<5 x float> undef, <5 x float>* undef, i32 1, <5 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0v3f32(<3 x float> undef, <3 x float>* undef, i32 1, <3 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0v1f32(<1 x float> undef, <1 x float>* undef, i32 1, <1 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0v7i64(<7 x i64> undef, <7 x i64>* undef, i32 1, <7 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0v6i64(<6 x i64> undef, <6 x i64>* undef, i32 1, <6 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0v5i64(<5 x i64> undef, <5 x i64>* undef, i32 1, <5 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0v3i64(<3 x i64> undef, <3 x i64>* undef, i32 1, <3 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0v15i32(<15 x i32> undef, <15 x i32>* undef, i32 1, <15 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0v14i32(<14 x i32> undef, <14 x i32>* undef, i32 1, <14 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0v13i32(<13 x i32> undef, <13 x i32>* undef, i32 1, <13 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0v12i32(<12 x i32> undef, <12 x i32>* undef, i32 1, <12 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0v11i32(<11 x i32> undef, <11 x i32>* undef, i32 1, <11 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0v10i32(<10 x i32> undef, <10 x i32>* undef, i32 1, <10 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0v9i32(<9 x i32> undef, <9 x i32>* undef, i32 1, <9 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0v7i32(<7 x i32> undef, <7 x i32>* undef, i32 1, <7 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0v6i32(<6 x i32> undef, <6 x i32>* undef, i32 1, <6 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0v5i32(<5 x i32> undef, <5 x i32>* undef, i32 1, <5 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0v3i32(<3 x i32> undef, <3 x i32>* undef, i32 1, <3 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0v1i32(<1 x i32> undef, <1 x i32>* undef, i32 1, <1 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SKX-LABEL: 'masked_store'
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0v7f64(<7 x double> undef, <7 x double>* undef, i32 1, <7 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0v6f64(<6 x double> undef, <6 x double>* undef, i32 1, <6 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0v5f64(<5 x double> undef, <5 x double>* undef, i32 1, <5 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0v3f64(<3 x double> undef, <3 x double>* undef, i32 1, <3 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0v15f32(<15 x float> undef, <15 x float>* undef, i32 1, <15 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0v14f32(<14 x float> undef, <14 x float>* undef, i32 1, <14 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0v13f32(<13 x float> undef, <13 x float>* undef, i32 1, <13 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0v12f32(<12 x float> undef, <12 x float>* undef, i32 1, <12 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0v11f32(<11 x float> undef, <11 x float>* undef, i32 1, <11 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0v10f32(<10 x float> undef, <10 x float>* undef, i32 1, <10 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0v9f32(<9 x float> undef, <9 x float>* undef, i32 1, <9 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0v7f32(<7 x float> undef, <7 x float>* undef, i32 1, <7 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0v6f32(<6 x float> undef, <6 x float>* undef, i32 1, <6 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0v5f32(<5 x float> undef, <5 x float>* undef, i32 1, <5 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0v3f32(<3 x float> undef, <3 x float>* undef, i32 1, <3 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0v1f32(<1 x float> undef, <1 x float>* undef, i32 1, <1 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0v7i64(<7 x i64> undef, <7 x i64>* undef, i32 1, <7 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0v6i64(<6 x i64> undef, <6 x i64>* undef, i32 1, <6 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0v5i64(<5 x i64> undef, <5 x i64>* undef, i32 1, <5 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0v3i64(<3 x i64> undef, <3 x i64>* undef, i32 1, <3 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0v15i32(<15 x i32> undef, <15 x i32>* undef, i32 1, <15 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0v14i32(<14 x i32> undef, <14 x i32>* undef, i32 1, <14 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0v13i32(<13 x i32> undef, <13 x i32>* undef, i32 1, <13 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0v12i32(<12 x i32> undef, <12 x i32>* undef, i32 1, <12 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0v11i32(<11 x i32> undef, <11 x i32>* undef, i32 1, <11 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0v10i32(<10 x i32> undef, <10 x i32>* undef, i32 1, <10 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0v9i32(<9 x i32> undef, <9 x i32>* undef, i32 1, <9 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0v7i32(<7 x i32> undef, <7 x i32>* undef, i32 1, <7 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0v6i32(<6 x i32> undef, <6 x i32>* undef, i32 1, <6 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0v5i32(<5 x i32> undef, <5 x i32>* undef, i32 1, <5 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0v3i32(<3 x i32> undef, <3 x i32>* undef, i32 1, <3 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0v1i32(<1 x i32> undef, <1 x i32>* undef, i32 1, <1 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
- call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.store.v7f64.p0v7f64(<7 x double> undef, <7 x double>* undef, i32 1, <7 x i1> undef)
- call void @llvm.masked.store.v6f64.p0v6f64(<6 x double> undef, <6 x double>* undef, i32 1, <6 x i1> undef)
- call void @llvm.masked.store.v5f64.p0v5f64(<5 x double> undef, <5 x double>* undef, i32 1, <5 x i1> undef)
- call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.store.v3f64.p0v3f64(<3 x double> undef, <3 x double>* undef, i32 1, <3 x i1> undef)
- call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef)
-
- call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.store.v15f32.p0v15f32(<15 x float> undef, <15 x float>* undef, i32 1, <15 x i1> undef)
- call void @llvm.masked.store.v14f32.p0v14f32(<14 x float> undef, <14 x float>* undef, i32 1, <14 x i1> undef)
- call void @llvm.masked.store.v13f32.p0v13f32(<13 x float> undef, <13 x float>* undef, i32 1, <13 x i1> undef)
- call void @llvm.masked.store.v12f32.p0v12f32(<12 x float> undef, <12 x float>* undef, i32 1, <12 x i1> undef)
- call void @llvm.masked.store.v11f32.p0v11f32(<11 x float> undef, <11 x float>* undef, i32 1, <11 x i1> undef)
- call void @llvm.masked.store.v10f32.p0v10f32(<10 x float> undef, <10 x float>* undef, i32 1, <10 x i1> undef)
- call void @llvm.masked.store.v9f32.p0v9f32(<9 x float> undef, <9 x float>* undef, i32 1, <9 x i1> undef)
- call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.store.v7f32.p0v7f32(<7 x float> undef, <7 x float>* undef, i32 1, <7 x i1> undef)
- call void @llvm.masked.store.v6f32.p0v6f32(<6 x float> undef, <6 x float>* undef, i32 1, <6 x i1> undef)
- call void @llvm.masked.store.v5f32.p0v5f32(<5 x float> undef, <5 x float>* undef, i32 1, <5 x i1> undef)
- call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.store.v3f32.p0v3f32(<3 x float> undef, <3 x float>* undef, i32 1, <3 x i1> undef)
- call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.store.v1f32.p0v1f32(<1 x float> undef, <1 x float>* undef, i32 1, <1 x i1> undef)
-
- call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.store.v7i64.p0v7i64(<7 x i64> undef, <7 x i64>* undef, i32 1, <7 x i1> undef)
- call void @llvm.masked.store.v6i64.p0v6i64(<6 x i64> undef, <6 x i64>* undef, i32 1, <6 x i1> undef)
- call void @llvm.masked.store.v5i64.p0v5i64(<5 x i64> undef, <5 x i64>* undef, i32 1, <5 x i1> undef)
- call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.store.v3i64.p0v3i64(<3 x i64> undef, <3 x i64>* undef, i32 1, <3 x i1> undef)
- call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef)
-
- call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.store.v15i32.p0v15i32(<15 x i32> undef, <15 x i32>* undef, i32 1, <15 x i1> undef)
- call void @llvm.masked.store.v14i32.p0v14i32(<14 x i32> undef, <14 x i32>* undef, i32 1, <14 x i1> undef)
- call void @llvm.masked.store.v13i32.p0v13i32(<13 x i32> undef, <13 x i32>* undef, i32 1, <13 x i1> undef)
- call void @llvm.masked.store.v12i32.p0v12i32(<12 x i32> undef, <12 x i32>* undef, i32 1, <12 x i1> undef)
- call void @llvm.masked.store.v11i32.p0v11i32(<11 x i32> undef, <11 x i32>* undef, i32 1, <11 x i1> undef)
- call void @llvm.masked.store.v10i32.p0v10i32(<10 x i32> undef, <10 x i32>* undef, i32 1, <10 x i1> undef)
- call void @llvm.masked.store.v9i32.p0v9i32(<9 x i32> undef, <9 x i32>* undef, i32 1, <9 x i1> undef)
- call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.store.v7i32.p0v7i32(<7 x i32> undef, <7 x i32>* undef, i32 1, <7 x i1> undef)
- call void @llvm.masked.store.v6i32.p0v6i32(<6 x i32> undef, <6 x i32>* undef, i32 1, <6 x i1> undef)
- call void @llvm.masked.store.v5i32.p0v5i32(<5 x i32> undef, <5 x i32>* undef, i32 1, <5 x i1> undef)
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.store.v3i32.p0v3i32(<3 x i32> undef, <3 x i32>* undef, i32 1, <3 x i1> undef)
- call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.store.v1i32.p0v1i32(<1 x i32> undef, <1 x i32>* undef, i32 1, <1 x i1> undef)
-
- call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef)
- call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef)
-
- call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef)
- call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef)
- call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef)
+ call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef)
+ call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef)
+ call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef)
+ call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef)
+
+ call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef)
+ call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef)
+ call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef)
+ call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef)
+ call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef)
+ call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef)
+ call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef)
+ call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef)
+ call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef)
+ call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef)
+ call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef)
+ call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef)
+
+ call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef)
+ call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef)
+ call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef)
+ call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef)
+ call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef)
+
+ call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef)
+ call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef)
+ call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef)
+ call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef)
+ call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef)
+ call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef)
+ call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef)
+ call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef)
+ call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef)
+ call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef)
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef)
+ call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef)
+
+ call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef)
+ call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef)
+
+ call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef)
+ call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef)
+ call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef)
ret i32 0
}
define i32 @masked_gather() {
; SSE2-LABEL: 'masked_gather'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 316 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 316 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SSE42-LABEL: 'masked_gather'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; AVX1-LABEL: 'masked_gather'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; AVX2-LABEL: 'masked_gather'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SKL-LABEL: 'masked_gather'
-; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; KNL-LABEL: 'masked_gather'
-; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SKX-LABEL: 'masked_gather'
-; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
- %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
- %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
- %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
- %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
+ %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef)
+ %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef)
+ %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef)
+ %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef)
- %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
- %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
- %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
- %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
+ %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef)
+ %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef)
+ %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef)
+ %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef)
- %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
- %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
- %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
- %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+ %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+ %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+ %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+ %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
- %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
- %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
- %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
- %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+ %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+ %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+ %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+ %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
- %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
- %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
- %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
- %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+ %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+ %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+ %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+ %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
- %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
- %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
- %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
- %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+ %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+ %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+ %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+ %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
ret i32 0
}
define i32 @masked_scatter() {
; SSE2-LABEL: 'masked_scatter'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 316 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 316 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SSE42-LABEL: 'masked_scatter'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; AVX-LABEL: 'masked_scatter'
-; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 106 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 210 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 105 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 106 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 210 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 105 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; KNL-LABEL: 'masked_scatter'
-; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SKX-LABEL: 'masked_scatter'
-; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
- call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)
+ call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
- call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)
+ call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
- call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)
- call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+ call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)
- call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
- call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef)
+ call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+ call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
ret i32 0
}
define i32 @masked_expandload() {
; SSE2-LABEL: 'masked_expandload'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SSE42-LABEL: 'masked_expandload'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; AVX-LABEL: 'masked_expandload'
-; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; AVX512-LABEL: 'masked_expandload'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
- %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)
- %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
- %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
- %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)
+ %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef)
+ %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef)
+ %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef)
+ %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef)
- %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
- %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
- %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)
- %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)
+ %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef)
+ %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef)
+ %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef)
+ %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef)
- %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
- %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
- %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)
- %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)
+ %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef)
+ %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef)
+ %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef)
+ %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef)
- %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
- %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
- %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)
- %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)
+ %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef)
+ %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef)
+ %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef)
+ %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef)
- %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
- %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
- %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)
- %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)
+ %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef)
+ %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef)
+ %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef)
+ %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef)
- %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
- %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
- %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
- %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)
+ %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef)
+ %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef)
+ %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef)
+ %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef)
ret i32 0
}
define i32 @masked_compressstore() {
; SSE2-LABEL: 'masked_compressstore'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SSE42-LABEL: 'masked_compressstore'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; AVX1-LABEL: 'masked_compressstore'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; AVX2-LABEL: 'masked_compressstore'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SKL-LABEL: 'masked_compressstore'
-; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; AVX512-LABEL: 'masked_compressstore'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
- call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
- call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
- call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
- call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
+ call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef)
+ call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef)
+ call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef)
+ call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef)
- call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
- call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
- call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
- call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
+ call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef)
+ call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef)
+ call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef)
+ call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef)
- call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
- call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
- call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
- call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
+ call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef)
+ call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef)
+ call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef)
+ call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef)
- call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
- call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
- call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
- call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
+ call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef)
+ call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef)
+ call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef)
+ call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef)
- call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
- call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
- call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
- call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
+ call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef)
+ call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef)
+ call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef)
+ call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef)
- call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
- call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
- call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
- call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+ call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef)
+ call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef)
+ call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef)
+ call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef)
ret i32 0
}
-define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) {
+define <2 x double> @test1(<2 x i64> %trigger, ptr %addr, <2 x double> %dst) {
; SSE2-LABEL: 'test1'
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;
; SSE42-LABEL: 'test1'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;
; AVX-LABEL: 'test1'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;
; AVX512-LABEL: 'test1'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;
%mask = icmp eq <2 x i64> %trigger, zeroinitializer
- %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)
+ %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)
ret <2 x double> %res
}
-define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
+define <4 x i32> @test2(<4 x i32> %trigger, ptr %addr, <4 x i32> %dst) {
; SSE2-LABEL: 'test2'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; SSE42-LABEL: 'test2'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; AVX-LABEL: 'test2'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; AVX512-LABEL: 'test2'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
- %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
+ %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
ret <4 x i32> %res
}
-define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
+define void @test3(<4 x i32> %trigger, ptr %addr, <4 x i32> %val) {
; SSE2-LABEL: 'test3'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'test3'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX-LABEL: 'test3'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512-LABEL: 'test3'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask)
+ call void @llvm.masked.store.v4i32.p0(<4 x i32>%val, ptr %addr, i32 4, <4 x i1>%mask)
ret void
}
-define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) {
+define <8 x float> @test4(<8 x i32> %trigger, ptr %addr, <8 x float> %dst) {
; SSE2-LABEL: 'test4'
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
;
; SSE42-LABEL: 'test4'
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
;
; AVX1-LABEL: 'test4'
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
;
; AVX2-LABEL: 'test4'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
;
; SKL-LABEL: 'test4'
; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
-; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
+; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
;
; AVX512-LABEL: 'test4'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
;
%mask = icmp eq <8 x i32> %trigger, zeroinitializer
- %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1>%mask, <8 x float>%dst)
+ %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1>%mask, <8 x float>%dst)
ret <8 x float> %res
}
-define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
+define void @test5(<2 x i32> %trigger, ptr %addr, <2 x float> %val) {
; SSE2-LABEL: 'test5'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'test5'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX-LABEL: 'test5'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512-LABEL: 'test5'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
- call void @llvm.masked.store.v2f32.p0v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask)
+ call void @llvm.masked.store.v2f32.p0(<2 x float>%val, ptr %addr, i32 4, <2 x i1>%mask)
ret void
}
-define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
+define void @test6(<2 x i32> %trigger, ptr %addr, <2 x i32> %val) {
; SSE2-LABEL: 'test6'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'test6'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX-LABEL: 'test6'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512-LABEL: 'test6'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
- call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
+ call void @llvm.masked.store.v2i32.p0(<2 x i32>%val, ptr %addr, i32 4, <2 x i1>%mask)
ret void
}
-define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) {
+define <2 x float> @test7(<2 x i32> %trigger, ptr %addr, <2 x float> %dst) {
; SSE2-LABEL: 'test7'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
;
; SSE42-LABEL: 'test7'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
;
; AVX-LABEL: 'test7'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
;
; AVX512-LABEL: 'test7'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
;
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
- %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
+ %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
ret <2 x float> %res
}
-define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
+define <2 x i32> @test8(<2 x i32> %trigger, ptr %addr, <2 x i32> %dst) {
; SSE2-LABEL: 'test8'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
;
; SSE42-LABEL: 'test8'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
;
; AVX-LABEL: 'test8'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
;
; AVX512-LABEL: 'test8'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
;
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
- %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
+ %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
ret <2 x i32> %res
}
-define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0) {
+define <2 x double> @test_gather_2f64(<2 x ptr> %ptrs, <2 x i1> %mask, <2 x double> %src0) {
; SSE2-LABEL: 'test_gather_2f64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;
; SSE42-LABEL: 'test_gather_2f64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;
; AVX1-LABEL: 'test_gather_2f64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;
; AVX2-LABEL: 'test_gather_2f64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;
; SKL-LABEL: 'test_gather_2f64'
-; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;
; AVX512-LABEL: 'test_gather_2f64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;
- %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+ %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
ret <2 x double> %res
}
-define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0) {
+define <4 x i32> @test_gather_4i32(<4 x ptr> %ptrs, <4 x i1> %mask, <4 x i32> %src0) {
; SSE2-LABEL: 'test_gather_4i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; SSE42-LABEL: 'test_gather_4i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; AVX1-LABEL: 'test_gather_4i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; AVX2-LABEL: 'test_gather_4i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; SKL-LABEL: 'test_gather_4i32'
-; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; KNL-LABEL: 'test_gather_4i32'
-; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; SKX-LABEL: 'test_gather_4i32'
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
- %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+ %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
ret <4 x i32> %res
}
-define <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0) {
+define <4 x i32> @test_gather_4i32_const_mask(<4 x ptr> %ptrs, <4 x i32> %src0) {
; SSE2-LABEL: 'test_gather_4i32_const_mask'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; SSE42-LABEL: 'test_gather_4i32_const_mask'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; AVX1-LABEL: 'test_gather_4i32_const_mask'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; AVX2-LABEL: 'test_gather_4i32_const_mask'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; SKL-LABEL: 'test_gather_4i32_const_mask'
-; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
+; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; KNL-LABEL: 'test_gather_4i32_const_mask'
-; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
+; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; SKX-LABEL: 'test_gather_4i32_const_mask'
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
- %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
+ %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
ret <4 x i32> %res
}
-define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) {
+define <16 x float> @test_gather_16f32_const_mask(ptr %base, <16 x i32> %ind) {
; SSE2-LABEL: 'test_gather_16f32_const_mask'
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; SSE42-LABEL: 'test_gather_16f32_const_mask'
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX1-LABEL: 'test_gather_16f32_const_mask'
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX2-LABEL: 'test_gather_16f32_const_mask'
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; SKL-LABEL: 'test_gather_16f32_const_mask'
; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX512-LABEL: 'test_gather_16f32_const_mask'
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
%sext_ind = sext <16 x i32> %ind to <16 x i64>
- %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
+ %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
- %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+ %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
ret <16 x float>%res
}
-define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) {
+define <16 x float> @test_gather_16f32_var_mask(ptr %base, <16 x i32> %ind, <16 x i1>%mask) {
; SSE2-LABEL: 'test_gather_16f32_var_mask'
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; SSE42-LABEL: 'test_gather_16f32_var_mask'
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX1-LABEL: 'test_gather_16f32_var_mask'
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX2-LABEL: 'test_gather_16f32_var_mask'
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; SKL-LABEL: 'test_gather_16f32_var_mask'
; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX512-LABEL: 'test_gather_16f32_var_mask'
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
%sext_ind = sext <16 x i32> %ind to <16 x i64>
- %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
+ %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
- %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+ %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
ret <16 x float>%res
}
-define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) {
+define <16 x float> @test_gather_16f32_ra_var_mask(<16 x ptr> %ptrs, <16 x i32> %ind, <16 x i1>%mask) {
; SSE2-LABEL: 'test_gather_16f32_ra_var_mask'
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
-; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind
+; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; SSE42-LABEL: 'test_gather_16f32_ra_var_mask'
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
-; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind
+; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX1-LABEL: 'test_gather_16f32_ra_var_mask'
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
-; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind
+; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX2-LABEL: 'test_gather_16f32_ra_var_mask'
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
-; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind
+; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; SKL-LABEL: 'test_gather_16f32_ra_var_mask'
; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
-; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind
+; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX512-LABEL: 'test_gather_16f32_ra_var_mask'
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
-; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind
+; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
%sext_ind = sext <16 x i32> %ind to <16 x i64>
- %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
+ %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind
- %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+ %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
ret <16 x float>%res
}
-define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind) {
+define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
; SSE2-LABEL: 'test_gather_16f32_const_mask2'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x float*> poison, float* %base, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> poison, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
-; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
+; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; SSE42-LABEL: 'test_gather_16f32_const_mask2'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> poison, float* %base, i32 0
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> poison, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
-; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
+; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX1-LABEL: 'test_gather_16f32_const_mask2'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> poison, float* %base, i32 0
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
+; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX2-LABEL: 'test_gather_16f32_const_mask2'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> poison, float* %base, i32 0
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
-; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
+; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; SKL-LABEL: 'test_gather_16f32_const_mask2'
-; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> poison, float* %base, i32 0
-; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> poison, <16 x i32> zeroinitializer
+; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
+; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
-; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
+; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX512-LABEL: 'test_gather_16f32_const_mask2'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> poison, float* %base, i32 0
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
-; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
+; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
- %broadcast.splatinsert = insertelement <16 x float*> poison, float* %base, i32 0
- %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> poison, <16 x i32> zeroinitializer
+ %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
+ %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
%sext_ind = sext <16 x i32> %ind to <16 x i64>
- %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
+ %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
- %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+ %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
ret <16 x float>%res
}
-define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
+define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
; SSE2-LABEL: 'test_scatter_16i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> poison, i32* %base, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'test_scatter_16i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> poison, i32* %base, i32 0
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX1-LABEL: 'test_scatter_16i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> poison, i32* %base, i32 0
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX2-LABEL: 'test_scatter_16i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> poison, i32* %base, i32 0
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SKL-LABEL: 'test_scatter_16i32'
-; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> poison, i32* %base, i32 0
-; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer
-; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
+; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
+; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; SKL-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; SKL-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512-LABEL: 'test_scatter_16i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> poison, i32* %base, i32 0
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %broadcast.splatinsert = insertelement <16 x i32*> poison, i32* %base, i32 0
- %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer
+ %broadcast.splatinsert = insertelement <16 x ptr> poison, ptr %base, i32 0
+ %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> poison, <16 x i32> zeroinitializer
- %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
+ %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
%imask = bitcast i16 %mask to <16 x i1>
- call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+ call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32>%val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
ret void
}
-define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) {
+define void @test_scatter_8i32(<8 x i32>%a1, <8 x ptr> %ptr, <8 x i1>%mask) {
; SSE2-LABEL: 'test_scatter_8i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'test_scatter_8i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX-LABEL: 'test_scatter_8i32'
-; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
+; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512-LABEL: 'test_scatter_8i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
+ call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask)
ret void
}
-define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
+define void @test_scatter_4i32(<4 x i32>%a1, <4 x ptr> %ptr, <4 x i1>%mask) {
; SSE2-LABEL: 'test_scatter_4i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'test_scatter_4i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX-LABEL: 'test_scatter_4i32'
-; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; KNL-LABEL: 'test_scatter_4i32'
-; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SKX-LABEL: 'test_scatter_4i32'
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+ call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask)
ret void
}
-define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) {
+define <4 x float> @test_gather_4f32(ptr %ptr, <4 x i32> %ind, <4 x i1>%mask) {
; SSE2-LABEL: 'test_gather_4f32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; SSE42-LABEL: 'test_gather_4f32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; AVX1-LABEL: 'test_gather_4f32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; AVX2-LABEL: 'test_gather_4f32'
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; SKL-LABEL: 'test_gather_4f32'
; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; KNL-LABEL: 'test_gather_4f32'
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; SKX-LABEL: 'test_gather_4f32'
; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
%sext_ind = sext <4 x i32> %ind to <4 x i64>
- %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
+ %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
- %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+ %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
ret <4 x float>%res
}
-define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) {
+define <4 x float> @test_gather_4f32_const_mask(ptr %ptr, <4 x i32> %ind) {
; SSE2-LABEL: 'test_gather_4f32_const_mask'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; SSE42-LABEL: 'test_gather_4f32_const_mask'
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; AVX1-LABEL: 'test_gather_4f32_const_mask'
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; AVX2-LABEL: 'test_gather_4f32_const_mask'
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; SKL-LABEL: 'test_gather_4f32_const_mask'
; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; KNL-LABEL: 'test_gather_4f32_const_mask'
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; SKX-LABEL: 'test_gather_4f32_const_mask'
; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
%sext_ind = sext <4 x i32> %ind to <4 x i64>
- %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
+ %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
- %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+ %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
ret <4 x float>%res
}
-declare <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
-declare <7 x double> @llvm.masked.load.v7f64.p0v7f64(<7 x double>*, i32, <7 x i1>, <7 x double>)
-declare <6 x double> @llvm.masked.load.v6f64.p0v6f64(<6 x double>*, i32, <6 x i1>, <6 x double>)
-declare <5 x double> @llvm.masked.load.v5f64.p0v5f64(<5 x double>*, i32, <5 x i1>, <5 x double>)
-declare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)
-declare <3 x double> @llvm.masked.load.v3f64.p0v3f64(<3 x double>*, i32, <3 x i1>, <3 x double>)
-declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
-declare <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>*, i32, <1 x i1>, <1 x double>)
-
-declare <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
-declare <15 x float> @llvm.masked.load.v15f32.p0v15f32(<15 x float>*, i32, <15 x i1>, <15 x float>)
-declare <14 x float> @llvm.masked.load.v14f32.p0v14f32(<14 x float>*, i32, <14 x i1>, <14 x float>)
-declare <13 x float> @llvm.masked.load.v13f32.p0v13f32(<13 x float>*, i32, <13 x i1>, <13 x float>)
-declare <12 x float> @llvm.masked.load.v12f32.p0v12f32(<12 x float>*, i32, <12 x i1>, <12 x float>)
-declare <11 x float> @llvm.masked.load.v11f32.p0v11f32(<11 x float>*, i32, <11 x i1>, <11 x float>)
-declare <10 x float> @llvm.masked.load.v10f32.p0v10f32(<10 x float>*, i32, <10 x i1>, <10 x float>)
-declare <9 x float> @llvm.masked.load.v9f32.p0v9f32(<9 x float>*, i32, <9 x i1>, <9 x float>)
-declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
-declare <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>*, i32, <7 x i1>, <7 x float>)
-declare <6 x float> @llvm.masked.load.v6f32.p0v6f32(<6 x float>*, i32, <6 x i1>, <6 x float>)
-declare <5 x float> @llvm.masked.load.v5f32.p0v5f32(<5 x float>*, i32, <5 x i1>, <5 x float>)
-declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
-declare <3 x float> @llvm.masked.load.v3f32.p0v3f32(<3 x float>*, i32, <3 x i1>, <3 x float>)
-declare <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
-declare <1 x float> @llvm.masked.load.v1f32.p0v1f32(<1 x float>*, i32, <1 x i1>, <1 x float>)
-
-declare <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>*, i32, <8 x i1>, <8 x i64>)
-declare <7 x i64> @llvm.masked.load.v7i64.p0v7i64(<7 x i64>*, i32, <7 x i1>, <7 x i64>)
-declare <6 x i64> @llvm.masked.load.v6i64.p0v6i64(<6 x i64>*, i32, <6 x i1>, <6 x i64>)
-declare <5 x i64> @llvm.masked.load.v5i64.p0v5i64(<5 x i64>*, i32, <5 x i1>, <5 x i64>)
-declare <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>*, i32, <4 x i1>, <4 x i64>)
-declare <3 x i64> @llvm.masked.load.v3i64.p0v3i64(<3 x i64>*, i32, <3 x i1>, <3 x i64>)
-declare <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>*, i32, <2 x i1>, <2 x i64>)
-declare <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>*, i32, <1 x i1>, <1 x i64>)
-
-declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
-declare <15 x i32> @llvm.masked.load.v15i32.p0v15i32(<15 x i32>*, i32, <15 x i1>, <15 x i32>)
-declare <14 x i32> @llvm.masked.load.v14i32.p0v14i32(<14 x i32>*, i32, <14 x i1>, <14 x i32>)
-declare <13 x i32> @llvm.masked.load.v13i32.p0v13i32(<13 x i32>*, i32, <13 x i1>, <13 x i32>)
-declare <12 x i32> @llvm.masked.load.v12i32.p0v12i32(<12 x i32>*, i32, <12 x i1>, <12 x i32>)
-declare <11 x i32> @llvm.masked.load.v11i32.p0v11i32(<11 x i32>*, i32, <11 x i1>, <11 x i32>)
-declare <10 x i32> @llvm.masked.load.v10i32.p0v10i32(<10 x i32>*, i32, <10 x i1>, <10 x i32>)
-declare <9 x i32> @llvm.masked.load.v9i32.p0v9i32(<9 x i32>*, i32, <9 x i1>, <9 x i32>)
-declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>)
-declare <7 x i32> @llvm.masked.load.v7i32.p0v7i32(<7 x i32>*, i32, <7 x i1>, <7 x i32>)
-declare <6 x i32> @llvm.masked.load.v6i32.p0v6i32(<6 x i32>*, i32, <6 x i1>, <6 x i32>)
-declare <5 x i32> @llvm.masked.load.v5i32.p0v5i32(<5 x i32>*, i32, <5 x i1>, <5 x i32>)
-declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
-declare <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>*, i32, <3 x i1>, <3 x i32>)
-declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
-declare <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>*, i32, <1 x i1>, <1 x i32>)
-
-declare <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>*, i32, <32 x i1>, <32 x i16>)
-declare <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i16>)
-declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>)
-declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32, <4 x i1>, <4 x i16>)
-
-declare <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>*, i32, <64 x i1>, <64 x i8>)
-declare <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>*, i32, <32 x i1>, <32 x i8>)
-declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>)
-declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>)
-
-declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>)
-declare void @llvm.masked.store.v7f64.p0v7f64(<7 x double>, <7 x double>*, i32, <7 x i1>)
-declare void @llvm.masked.store.v6f64.p0v6f64(<6 x double>, <6 x double>*, i32, <6 x i1>)
-declare void @llvm.masked.store.v5f64.p0v5f64(<5 x double>, <5 x double>*, i32, <5 x i1>)
-declare void @llvm.masked.store.v4f64.p0v4f64(<4 x double>, <4 x double>*, i32, <4 x i1>)
-declare void @llvm.masked.store.v3f64.p0v3f64(<3 x double>, <3 x double>*, i32, <3 x i1>)
-declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)
-declare void @llvm.masked.store.v1f64.p0v1f64(<1 x double>, <1 x double>*, i32, <1 x i1>)
-
-declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
-declare void @llvm.masked.store.v15f32.p0v15f32(<15 x float>, <15 x float>*, i32, <15 x i1>)
-declare void @llvm.masked.store.v14f32.p0v14f32(<14 x float>, <14 x float>*, i32, <14 x i1>)
-declare void @llvm.masked.store.v13f32.p0v13f32(<13 x float>, <13 x float>*, i32, <13 x i1>)
-declare void @llvm.masked.store.v12f32.p0v12f32(<12 x float>, <12 x float>*, i32, <12 x i1>)
-declare void @llvm.masked.store.v11f32.p0v11f32(<11 x float>, <11 x float>*, i32, <11 x i1>)
-declare void @llvm.masked.store.v10f32.p0v10f32(<10 x float>, <10 x float>*, i32, <10 x i1>)
-declare void @llvm.masked.store.v9f32.p0v9f32(<9 x float>, <9 x float>*, i32, <9 x i1>)
-declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32, <8 x i1>)
-declare void @llvm.masked.store.v7f32.p0v7f32(<7 x float>, <7 x float>*, i32, <7 x i1>)
-declare void @llvm.masked.store.v6f32.p0v6f32(<6 x float>, <6 x float>*, i32, <6 x i1>)
-declare void @llvm.masked.store.v5f32.p0v5f32(<5 x float>, <5 x float>*, i32, <5 x i1>)
-declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>)
-declare void @llvm.masked.store.v3f32.p0v3f32(<3 x float>, <3 x float>*, i32, <3 x i1>)
-declare void @llvm.masked.store.v2f32.p0v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>)
-declare void @llvm.masked.store.v1f32.p0v1f32(<1 x float>, <1 x float>*, i32, <1 x i1>)
-
-declare void @llvm.masked.store.v8i64.p0v8i64(<8 x i64>, <8 x i64>*, i32, <8 x i1>)
-declare void @llvm.masked.store.v7i64.p0v7i64(<7 x i64>, <7 x i64>*, i32, <7 x i1>)
-declare void @llvm.masked.store.v6i64.p0v6i64(<6 x i64>, <6 x i64>*, i32, <6 x i1>)
-declare void @llvm.masked.store.v5i64.p0v5i64(<5 x i64>, <5 x i64>*, i32, <5 x i1>)
-declare void @llvm.masked.store.v4i64.p0v4i64(<4 x i64>, <4 x i64>*, i32, <4 x i1>)
-declare void @llvm.masked.store.v3i64.p0v3i64(<3 x i64>, <3 x i64>*, i32, <3 x i1>)
-declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
-declare void @llvm.masked.store.v1i64.p0v1i64(<1 x i64>, <1 x i64>*, i32, <1 x i1>)
-
-declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
-declare void @llvm.masked.store.v15i32.p0v15i32(<15 x i32>, <15 x i32>*, i32, <15 x i1>)
-declare void @llvm.masked.store.v14i32.p0v14i32(<14 x i32>, <14 x i32>*, i32, <14 x i1>)
-declare void @llvm.masked.store.v13i32.p0v13i32(<13 x i32>, <13 x i32>*, i32, <13 x i1>)
-declare void @llvm.masked.store.v12i32.p0v12i32(<12 x i32>, <12 x i32>*, i32, <12 x i1>)
-declare void @llvm.masked.store.v11i32.p0v11i32(<11 x i32>, <11 x i32>*, i32, <11 x i1>)
-declare void @llvm.masked.store.v10i32.p0v10i32(<10 x i32>, <10 x i32>*, i32, <10 x i1>)
-declare void @llvm.masked.store.v9i32.p0v9i32(<9 x i32>, <9 x i32>*, i32, <9 x i1>)
-declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)
-declare void @llvm.masked.store.v7i32.p0v7i32(<7 x i32>, <7 x i32>*, i32, <7 x i1>)
-declare void @llvm.masked.store.v6i32.p0v6i32(<6 x i32>, <6 x i32>*, i32, <6 x i1>)
-declare void @llvm.masked.store.v5i32.p0v5i32(<5 x i32>, <5 x i32>*, i32, <5 x i1>)
-declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
-declare void @llvm.masked.store.v3i32.p0v3i32(<3 x i32>, <3 x i32>*, i32, <3 x i1>)
-declare void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>)
-declare void @llvm.masked.store.v1i32.p0v1i32(<1 x i32>, <1 x i32>*, i32, <1 x i1>)
-
-declare void @llvm.masked.store.v32i16.p0v32i16(<32 x i16>, <32 x i16>*, i32, <32 x i1>)
-declare void @llvm.masked.store.v16i16.p0v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>)
-declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>)
-declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32, <4 x i1>)
-
-declare void @llvm.masked.store.v64i8.p0v64i8(<64 x i8>, <64 x i8>*, i32, <64 x i1>)
-declare void @llvm.masked.store.v32i8.p0v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>)
-declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>)
-declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>)
-
-declare <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*>, i32, <8 x i1>, <8 x double>)
-declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*>, i32, <4 x i1>, <4 x double>)
-declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*>, i32, <2 x i1>, <2 x double>)
-declare <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*>, i32, <1 x i1>, <1 x double>)
-
-declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
-declare <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*>, i32, <8 x i1>, <8 x float>)
-declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*>, i32, <4 x i1>, <4 x float>)
-declare <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*>, i32, <2 x i1>, <2 x float>)
-
-declare <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*>, i32, <8 x i1>, <8 x i64>)
-declare <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>)
-declare <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>)
-declare <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*>, i32, <1 x i1>, <1 x i64>)
-
-declare <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>)
-declare <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>, i32, <8 x i1>, <8 x i32>)
-declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
-declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
-
-declare <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*>, i32, <32 x i1>, <32 x i16>)
-declare <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*>, i32, <16 x i1>, <16 x i16>)
-declare <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*>, i32, <8 x i1>, <8 x i16>)
-declare <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*>, i32, <4 x i1>, <4 x i16>)
-
-declare <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*>, i32, <64 x i1>, <64 x i8>)
-declare <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*>, i32, <32 x i1>, <32 x i8>)
-declare <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*>, i32, <16 x i1>, <16 x i8>)
-declare <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*>, i32, <8 x i1>, <8 x i8>)
-
-declare void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double>, <8 x double*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double>, <4 x double*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double>, <2 x double*>, i32, <2 x i1>)
-declare void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double>, <1 x double*>, i32, <1 x i1>)
-
-declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float>, <8 x float*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float>, <4 x float*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float>, <2 x float*>, i32, <2 x i1>)
-
-declare void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64>, <8 x i64*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64>, <4 x i64*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64>, <2 x i64*>, i32, <2 x i1>)
-declare void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64>, <1 x i64*>, i32, <1 x i1>)
-
-declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>, <16 x i32*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32>, <8 x i32*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32>, <2 x i32*>, i32, <2 x i1>)
-
-declare void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16>, <32 x i16*>, i32, <32 x i1>)
-declare void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16>, <16 x i16*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16>, <8 x i16*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16>, <4 x i16*>, i32, <4 x i1>)
-
-declare void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8>, <64 x i8*>, i32, <64 x i1>)
-declare void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8>, <32 x i8*>, i32, <32 x i1>)
-declare void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8>, <16 x i8*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8>, <8 x i8*>, i32, <8 x i1>)
-
-declare <8 x double> @llvm.masked.expandload.v8f64(double*, <8 x i1>, <8 x double>)
-declare <4 x double> @llvm.masked.expandload.v4f64(double*, <4 x i1>, <4 x double>)
-declare <2 x double> @llvm.masked.expandload.v2f64(double*, <2 x i1>, <2 x double>)
-declare <1 x double> @llvm.masked.expandload.v1f64(double*, <1 x i1>, <1 x double>)
-
-declare <16 x float> @llvm.masked.expandload.v16f32(float*, <16 x i1>, <16 x float>)
-declare <8 x float> @llvm.masked.expandload.v8f32(float*, <8 x i1>, <8 x float>)
-declare <4 x float> @llvm.masked.expandload.v4f32(float*, <4 x i1>, <4 x float>)
-declare <2 x float> @llvm.masked.expandload.v2f32(float*, <2 x i1>, <2 x float>)
-
-declare <8 x i64> @llvm.masked.expandload.v8i64(i64*, <8 x i1>, <8 x i64>)
-declare <4 x i64> @llvm.masked.expandload.v4i64(i64*, <4 x i1>, <4 x i64>)
-declare <2 x i64> @llvm.masked.expandload.v2i64(i64*, <2 x i1>, <2 x i64>)
-declare <1 x i64> @llvm.masked.expandload.v1i64(i64*, <1 x i1>, <1 x i64>)
-
-declare <16 x i32> @llvm.masked.expandload.v16i32(i32*, <16 x i1>, <16 x i32>)
-declare <8 x i32> @llvm.masked.expandload.v8i32(i32*, <8 x i1>, <8 x i32>)
-declare <4 x i32> @llvm.masked.expandload.v4i32(i32*, <4 x i1>, <4 x i32>)
-declare <2 x i32> @llvm.masked.expandload.v2i32(i32*, <2 x i1>, <2 x i32>)
-
-declare <32 x i16> @llvm.masked.expandload.v32i16(i16*, <32 x i1>, <32 x i16>)
-declare <16 x i16> @llvm.masked.expandload.v16i16(i16*, <16 x i1>, <16 x i16>)
-declare <8 x i16> @llvm.masked.expandload.v8i16(i16*, <8 x i1>, <8 x i16>)
-declare <4 x i16> @llvm.masked.expandload.v4i16(i16*, <4 x i1>, <4 x i16>)
-
-declare <64 x i8> @llvm.masked.expandload.v64i8(i8*, <64 x i1>, <64 x i8>)
-declare <32 x i8> @llvm.masked.expandload.v32i8(i8*, <32 x i1>, <32 x i8>)
-declare <16 x i8> @llvm.masked.expandload.v16i8(i8*, <16 x i1>, <16 x i8>)
-declare <8 x i8> @llvm.masked.expandload.v8i8(i8*, <8 x i1>, <8 x i8>)
-
-declare void @llvm.masked.compressstore.v8f64(<8 x double>, double*, <8 x i1>)
-declare void @llvm.masked.compressstore.v4f64(<4 x double>, double*, <4 x i1>)
-declare void @llvm.masked.compressstore.v2f64(<2 x double>, double*, <2 x i1>)
-declare void @llvm.masked.compressstore.v1f64(<1 x double>, double*, <1 x i1>)
-
-declare void @llvm.masked.compressstore.v16f32(<16 x float>, float*, <16 x i1>)
-declare void @llvm.masked.compressstore.v8f32(<8 x float>, float*, <8 x i1>)
-declare void @llvm.masked.compressstore.v4f32(<4 x float>, float*, <4 x i1>)
-declare void @llvm.masked.compressstore.v2f32(<2 x float>, float*, <2 x i1>)
-
-declare void @llvm.masked.compressstore.v8i64(<8 x i64>, i64*, <8 x i1>)
-declare void @llvm.masked.compressstore.v4i64(<4 x i64>, i64*, <4 x i1>)
-declare void @llvm.masked.compressstore.v2i64(<2 x i64>, i64*, <2 x i1>)
-declare void @llvm.masked.compressstore.v1i64(<1 x i64>, i64*, <1 x i1>)
-
-declare void @llvm.masked.compressstore.v16i32(<16 x i32>, i32*, <16 x i1>)
-declare void @llvm.masked.compressstore.v8i32(<8 x i32>, i32*, <8 x i1>)
-declare void @llvm.masked.compressstore.v4i32(<4 x i32>, i32*, <4 x i1>)
-declare void @llvm.masked.compressstore.v2i32(<2 x i32>, i32*, <2 x i1>)
-
-declare void @llvm.masked.compressstore.v32i16(<32 x i16>, i16*, <32 x i1>)
-declare void @llvm.masked.compressstore.v16i16(<16 x i16>, i16*, <16 x i1>)
-declare void @llvm.masked.compressstore.v8i16(<8 x i16>, i16*, <8 x i1>)
-declare void @llvm.masked.compressstore.v4i16(<4 x i16>, i16*, <4 x i1>)
-
-declare void @llvm.masked.compressstore.v64i8(<64 x i8>, i8*, <64 x i1>)
-declare void @llvm.masked.compressstore.v32i8(<32 x i8>, i8*, <32 x i1>)
-declare void @llvm.masked.compressstore.v16i8(<16 x i8>, i8*, <16 x i1>)
-declare void @llvm.masked.compressstore.v8i8(<8 x i8>, i8*, <8 x i1>)
+declare <8 x double> @llvm.masked.load.v8f64.p0(ptr, i32, <8 x i1>, <8 x double>)
+declare <7 x double> @llvm.masked.load.v7f64.p0(ptr, i32, <7 x i1>, <7 x double>)
+declare <6 x double> @llvm.masked.load.v6f64.p0(ptr, i32, <6 x i1>, <6 x double>)
+declare <5 x double> @llvm.masked.load.v5f64.p0(ptr, i32, <5 x i1>, <5 x double>)
+declare <4 x double> @llvm.masked.load.v4f64.p0(ptr, i32, <4 x i1>, <4 x double>)
+declare <3 x double> @llvm.masked.load.v3f64.p0(ptr, i32, <3 x i1>, <3 x double>)
+declare <2 x double> @llvm.masked.load.v2f64.p0(ptr, i32, <2 x i1>, <2 x double>)
+declare <1 x double> @llvm.masked.load.v1f64.p0(ptr, i32, <1 x i1>, <1 x double>)
+
+declare <16 x float> @llvm.masked.load.v16f32.p0(ptr, i32, <16 x i1>, <16 x float>)
+declare <15 x float> @llvm.masked.load.v15f32.p0(ptr, i32, <15 x i1>, <15 x float>)
+declare <14 x float> @llvm.masked.load.v14f32.p0(ptr, i32, <14 x i1>, <14 x float>)
+declare <13 x float> @llvm.masked.load.v13f32.p0(ptr, i32, <13 x i1>, <13 x float>)
+declare <12 x float> @llvm.masked.load.v12f32.p0(ptr, i32, <12 x i1>, <12 x float>)
+declare <11 x float> @llvm.masked.load.v11f32.p0(ptr, i32, <11 x i1>, <11 x float>)
+declare <10 x float> @llvm.masked.load.v10f32.p0(ptr, i32, <10 x i1>, <10 x float>)
+declare <9 x float> @llvm.masked.load.v9f32.p0(ptr, i32, <9 x i1>, <9 x float>)
+declare <8 x float> @llvm.masked.load.v8f32.p0(ptr, i32, <8 x i1>, <8 x float>)
+declare <7 x float> @llvm.masked.load.v7f32.p0(ptr, i32, <7 x i1>, <7 x float>)
+declare <6 x float> @llvm.masked.load.v6f32.p0(ptr, i32, <6 x i1>, <6 x float>)
+declare <5 x float> @llvm.masked.load.v5f32.p0(ptr, i32, <5 x i1>, <5 x float>)
+declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32, <4 x i1>, <4 x float>)
+declare <3 x float> @llvm.masked.load.v3f32.p0(ptr, i32, <3 x i1>, <3 x float>)
+declare <2 x float> @llvm.masked.load.v2f32.p0(ptr, i32, <2 x i1>, <2 x float>)
+declare <1 x float> @llvm.masked.load.v1f32.p0(ptr, i32, <1 x i1>, <1 x float>)
+
+declare <8 x i64> @llvm.masked.load.v8i64.p0(ptr, i32, <8 x i1>, <8 x i64>)
+declare <7 x i64> @llvm.masked.load.v7i64.p0(ptr, i32, <7 x i1>, <7 x i64>)
+declare <6 x i64> @llvm.masked.load.v6i64.p0(ptr, i32, <6 x i1>, <6 x i64>)
+declare <5 x i64> @llvm.masked.load.v5i64.p0(ptr, i32, <5 x i1>, <5 x i64>)
+declare <4 x i64> @llvm.masked.load.v4i64.p0(ptr, i32, <4 x i1>, <4 x i64>)
+declare <3 x i64> @llvm.masked.load.v3i64.p0(ptr, i32, <3 x i1>, <3 x i64>)
+declare <2 x i64> @llvm.masked.load.v2i64.p0(ptr, i32, <2 x i1>, <2 x i64>)
+declare <1 x i64> @llvm.masked.load.v1i64.p0(ptr, i32, <1 x i1>, <1 x i64>)
+
+declare <16 x i32> @llvm.masked.load.v16i32.p0(ptr, i32, <16 x i1>, <16 x i32>)
+declare <15 x i32> @llvm.masked.load.v15i32.p0(ptr, i32, <15 x i1>, <15 x i32>)
+declare <14 x i32> @llvm.masked.load.v14i32.p0(ptr, i32, <14 x i1>, <14 x i32>)
+declare <13 x i32> @llvm.masked.load.v13i32.p0(ptr, i32, <13 x i1>, <13 x i32>)
+declare <12 x i32> @llvm.masked.load.v12i32.p0(ptr, i32, <12 x i1>, <12 x i32>)
+declare <11 x i32> @llvm.masked.load.v11i32.p0(ptr, i32, <11 x i1>, <11 x i32>)
+declare <10 x i32> @llvm.masked.load.v10i32.p0(ptr, i32, <10 x i1>, <10 x i32>)
+declare <9 x i32> @llvm.masked.load.v9i32.p0(ptr, i32, <9 x i1>, <9 x i32>)
+declare <8 x i32> @llvm.masked.load.v8i32.p0(ptr, i32, <8 x i1>, <8 x i32>)
+declare <7 x i32> @llvm.masked.load.v7i32.p0(ptr, i32, <7 x i1>, <7 x i32>)
+declare <6 x i32> @llvm.masked.load.v6i32.p0(ptr, i32, <6 x i1>, <6 x i32>)
+declare <5 x i32> @llvm.masked.load.v5i32.p0(ptr, i32, <5 x i1>, <5 x i32>)
+declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>)
+declare <3 x i32> @llvm.masked.load.v3i32.p0(ptr, i32, <3 x i1>, <3 x i32>)
+declare <2 x i32> @llvm.masked.load.v2i32.p0(ptr, i32, <2 x i1>, <2 x i32>)
+declare <1 x i32> @llvm.masked.load.v1i32.p0(ptr, i32, <1 x i1>, <1 x i32>)
+
+declare <32 x i16> @llvm.masked.load.v32i16.p0(ptr, i32, <32 x i1>, <32 x i16>)
+declare <16 x i16> @llvm.masked.load.v16i16.p0(ptr, i32, <16 x i1>, <16 x i16>)
+declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32, <8 x i1>, <8 x i16>)
+declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32, <4 x i1>, <4 x i16>)
+
+declare <64 x i8> @llvm.masked.load.v64i8.p0(ptr, i32, <64 x i1>, <64 x i8>)
+declare <32 x i8> @llvm.masked.load.v32i8.p0(ptr, i32, <32 x i1>, <32 x i8>)
+declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32, <16 x i1>, <16 x i8>)
+declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32, <8 x i1>, <8 x i8>)
+
+declare void @llvm.masked.store.v8f64.p0(<8 x double>, ptr, i32, <8 x i1>)
+declare void @llvm.masked.store.v7f64.p0(<7 x double>, ptr, i32, <7 x i1>)
+declare void @llvm.masked.store.v6f64.p0(<6 x double>, ptr, i32, <6 x i1>)
+declare void @llvm.masked.store.v5f64.p0(<5 x double>, ptr, i32, <5 x i1>)
+declare void @llvm.masked.store.v4f64.p0(<4 x double>, ptr, i32, <4 x i1>)
+declare void @llvm.masked.store.v3f64.p0(<3 x double>, ptr, i32, <3 x i1>)
+declare void @llvm.masked.store.v2f64.p0(<2 x double>, ptr, i32, <2 x i1>)
+declare void @llvm.masked.store.v1f64.p0(<1 x double>, ptr, i32, <1 x i1>)
+
+declare void @llvm.masked.store.v16f32.p0(<16 x float>, ptr, i32, <16 x i1>)
+declare void @llvm.masked.store.v15f32.p0(<15 x float>, ptr, i32, <15 x i1>)
+declare void @llvm.masked.store.v14f32.p0(<14 x float>, ptr, i32, <14 x i1>)
+declare void @llvm.masked.store.v13f32.p0(<13 x float>, ptr, i32, <13 x i1>)
+declare void @llvm.masked.store.v12f32.p0(<12 x float>, ptr, i32, <12 x i1>)
+declare void @llvm.masked.store.v11f32.p0(<11 x float>, ptr, i32, <11 x i1>)
+declare void @llvm.masked.store.v10f32.p0(<10 x float>, ptr, i32, <10 x i1>)
+declare void @llvm.masked.store.v9f32.p0(<9 x float>, ptr, i32, <9 x i1>)
+declare void @llvm.masked.store.v8f32.p0(<8 x float>, ptr, i32, <8 x i1>)
+declare void @llvm.masked.store.v7f32.p0(<7 x float>, ptr, i32, <7 x i1>)
+declare void @llvm.masked.store.v6f32.p0(<6 x float>, ptr, i32, <6 x i1>)
+declare void @llvm.masked.store.v5f32.p0(<5 x float>, ptr, i32, <5 x i1>)
+declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>)
+declare void @llvm.masked.store.v3f32.p0(<3 x float>, ptr, i32, <3 x i1>)
+declare void @llvm.masked.store.v2f32.p0(<2 x float>, ptr, i32, <2 x i1>)
+declare void @llvm.masked.store.v1f32.p0(<1 x float>, ptr, i32, <1 x i1>)
+
+declare void @llvm.masked.store.v8i64.p0(<8 x i64>, ptr, i32, <8 x i1>)
+declare void @llvm.masked.store.v7i64.p0(<7 x i64>, ptr, i32, <7 x i1>)
+declare void @llvm.masked.store.v6i64.p0(<6 x i64>, ptr, i32, <6 x i1>)
+declare void @llvm.masked.store.v5i64.p0(<5 x i64>, ptr, i32, <5 x i1>)
+declare void @llvm.masked.store.v4i64.p0(<4 x i64>, ptr, i32, <4 x i1>)
+declare void @llvm.masked.store.v3i64.p0(<3 x i64>, ptr, i32, <3 x i1>)
+declare void @llvm.masked.store.v2i64.p0(<2 x i64>, ptr, i32, <2 x i1>)
+declare void @llvm.masked.store.v1i64.p0(<1 x i64>, ptr, i32, <1 x i1>)
+
+declare void @llvm.masked.store.v16i32.p0(<16 x i32>, ptr, i32, <16 x i1>)
+declare void @llvm.masked.store.v15i32.p0(<15 x i32>, ptr, i32, <15 x i1>)
+declare void @llvm.masked.store.v14i32.p0(<14 x i32>, ptr, i32, <14 x i1>)
+declare void @llvm.masked.store.v13i32.p0(<13 x i32>, ptr, i32, <13 x i1>)
+declare void @llvm.masked.store.v12i32.p0(<12 x i32>, ptr, i32, <12 x i1>)
+declare void @llvm.masked.store.v11i32.p0(<11 x i32>, ptr, i32, <11 x i1>)
+declare void @llvm.masked.store.v10i32.p0(<10 x i32>, ptr, i32, <10 x i1>)
+declare void @llvm.masked.store.v9i32.p0(<9 x i32>, ptr, i32, <9 x i1>)
+declare void @llvm.masked.store.v8i32.p0(<8 x i32>, ptr, i32, <8 x i1>)
+declare void @llvm.masked.store.v7i32.p0(<7 x i32>, ptr, i32, <7 x i1>)
+declare void @llvm.masked.store.v6i32.p0(<6 x i32>, ptr, i32, <6 x i1>)
+declare void @llvm.masked.store.v5i32.p0(<5 x i32>, ptr, i32, <5 x i1>)
+declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>)
+declare void @llvm.masked.store.v3i32.p0(<3 x i32>, ptr, i32, <3 x i1>)
+declare void @llvm.masked.store.v2i32.p0(<2 x i32>, ptr, i32, <2 x i1>)
+declare void @llvm.masked.store.v1i32.p0(<1 x i32>, ptr, i32, <1 x i1>)
+
+declare void @llvm.masked.store.v32i16.p0(<32 x i16>, ptr, i32, <32 x i1>)
+declare void @llvm.masked.store.v16i16.p0(<16 x i16>, ptr, i32, <16 x i1>)
+declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32, <8 x i1>)
+declare void @llvm.masked.store.v4i16.p0(<4 x i16>, ptr, i32, <4 x i1>)
+
+declare void @llvm.masked.store.v64i8.p0(<64 x i8>, ptr, i32, <64 x i1>)
+declare void @llvm.masked.store.v32i8.p0(<32 x i8>, ptr, i32, <32 x i1>)
+declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32, <16 x i1>)
+declare void @llvm.masked.store.v8i8.p0(<8 x i8>, ptr, i32, <8 x i1>)
+
+declare <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x double>)
+declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x double>)
+declare <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x double>)
+declare <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x double>)
+
+declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x float>)
+declare <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x float>)
+declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>)
+declare <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x float>)
+
+declare <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i64>)
+declare <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i64>)
+declare <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i64>)
+declare <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i64>)
+
+declare <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i32>)
+declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i32>)
+declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
+declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i32>)
+
+declare <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i16>)
+declare <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i16>)
+declare <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i16>)
+declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>)
+
+declare <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr>, i32, <64 x i1>, <64 x i8>)
+declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i8>)
+declare <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i8>)
+declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>)
+
+declare void @llvm.masked.scatter.v8f64.v8p0(<8 x double>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, i32, <2 x i1>)
+declare void @llvm.masked.scatter.v1f64.v1p0(<1 x double>, <1 x ptr>, i32, <1 x i1>)
+
+declare void @llvm.masked.scatter.v16f32.v16p0(<16 x float>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, i32, <2 x i1>)
+
+declare void @llvm.masked.scatter.v8i64.v8p0(<8 x i64>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4i64.v4p0(<4 x i64>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2i64.v2p0(<2 x i64>, <2 x ptr>, i32, <2 x i1>)
+declare void @llvm.masked.scatter.v1i64.v1p0(<1 x i64>, <1 x ptr>, i32, <1 x i1>)
+
+declare void @llvm.masked.scatter.v16i32.v16p0(<16 x i32>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, i32, <2 x i1>)
+
+declare void @llvm.masked.scatter.v32i16.v32p0(<32 x i16>, <32 x ptr>, i32, <32 x i1>)
+declare void @llvm.masked.scatter.v16i16.v16p0(<16 x i16>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>)
+
+declare void @llvm.masked.scatter.v64i8.v64p0(<64 x i8>, <64 x ptr>, i32, <64 x i1>)
+declare void @llvm.masked.scatter.v32i8.v32p0(<32 x i8>, <32 x ptr>, i32, <32 x i1>)
+declare void @llvm.masked.scatter.v16i8.v16p0(<16 x i8>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v8i8.v8p0(<8 x i8>, <8 x ptr>, i32, <8 x i1>)
+
+declare <8 x double> @llvm.masked.expandload.v8f64(ptr, <8 x i1>, <8 x double>)
+declare <4 x double> @llvm.masked.expandload.v4f64(ptr, <4 x i1>, <4 x double>)
+declare <2 x double> @llvm.masked.expandload.v2f64(ptr, <2 x i1>, <2 x double>)
+declare <1 x double> @llvm.masked.expandload.v1f64(ptr, <1 x i1>, <1 x double>)
+
+declare <16 x float> @llvm.masked.expandload.v16f32(ptr, <16 x i1>, <16 x float>)
+declare <8 x float> @llvm.masked.expandload.v8f32(ptr, <8 x i1>, <8 x float>)
+declare <4 x float> @llvm.masked.expandload.v4f32(ptr, <4 x i1>, <4 x float>)
+declare <2 x float> @llvm.masked.expandload.v2f32(ptr, <2 x i1>, <2 x float>)
+
+declare <8 x i64> @llvm.masked.expandload.v8i64(ptr, <8 x i1>, <8 x i64>)
+declare <4 x i64> @llvm.masked.expandload.v4i64(ptr, <4 x i1>, <4 x i64>)
+declare <2 x i64> @llvm.masked.expandload.v2i64(ptr, <2 x i1>, <2 x i64>)
+declare <1 x i64> @llvm.masked.expandload.v1i64(ptr, <1 x i1>, <1 x i64>)
+
+declare <16 x i32> @llvm.masked.expandload.v16i32(ptr, <16 x i1>, <16 x i32>)
+declare <8 x i32> @llvm.masked.expandload.v8i32(ptr, <8 x i1>, <8 x i32>)
+declare <4 x i32> @llvm.masked.expandload.v4i32(ptr, <4 x i1>, <4 x i32>)
+declare <2 x i32> @llvm.masked.expandload.v2i32(ptr, <2 x i1>, <2 x i32>)
+
+declare <32 x i16> @llvm.masked.expandload.v32i16(ptr, <32 x i1>, <32 x i16>)
+declare <16 x i16> @llvm.masked.expandload.v16i16(ptr, <16 x i1>, <16 x i16>)
+declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>)
+declare <4 x i16> @llvm.masked.expandload.v4i16(ptr, <4 x i1>, <4 x i16>)
+
+declare <64 x i8> @llvm.masked.expandload.v64i8(ptr, <64 x i1>, <64 x i8>)
+declare <32 x i8> @llvm.masked.expandload.v32i8(ptr, <32 x i1>, <32 x i8>)
+declare <16 x i8> @llvm.masked.expandload.v16i8(ptr, <16 x i1>, <16 x i8>)
+declare <8 x i8> @llvm.masked.expandload.v8i8(ptr, <8 x i1>, <8 x i8>)
+
+declare void @llvm.masked.compressstore.v8f64(<8 x double>, ptr, <8 x i1>)
+declare void @llvm.masked.compressstore.v4f64(<4 x double>, ptr, <4 x i1>)
+declare void @llvm.masked.compressstore.v2f64(<2 x double>, ptr, <2 x i1>)
+declare void @llvm.masked.compressstore.v1f64(<1 x double>, ptr, <1 x i1>)
+
+declare void @llvm.masked.compressstore.v16f32(<16 x float>, ptr, <16 x i1>)
+declare void @llvm.masked.compressstore.v8f32(<8 x float>, ptr, <8 x i1>)
+declare void @llvm.masked.compressstore.v4f32(<4 x float>, ptr, <4 x i1>)
+declare void @llvm.masked.compressstore.v2f32(<2 x float>, ptr, <2 x i1>)
+
+declare void @llvm.masked.compressstore.v8i64(<8 x i64>, ptr, <8 x i1>)
+declare void @llvm.masked.compressstore.v4i64(<4 x i64>, ptr, <4 x i1>)
+declare void @llvm.masked.compressstore.v2i64(<2 x i64>, ptr, <2 x i1>)
+declare void @llvm.masked.compressstore.v1i64(<1 x i64>, ptr, <1 x i1>)
+
+declare void @llvm.masked.compressstore.v16i32(<16 x i32>, ptr, <16 x i1>)
+declare void @llvm.masked.compressstore.v8i32(<8 x i32>, ptr, <8 x i1>)
+declare void @llvm.masked.compressstore.v4i32(<4 x i32>, ptr, <4 x i1>)
+declare void @llvm.masked.compressstore.v2i32(<2 x i32>, ptr, <2 x i1>)
+
+declare void @llvm.masked.compressstore.v32i16(<32 x i16>, ptr, <32 x i1>)
+declare void @llvm.masked.compressstore.v16i16(<16 x i16>, ptr, <16 x i1>)
+declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr, <8 x i1>)
+declare void @llvm.masked.compressstore.v4i16(<4 x i16>, ptr, <4 x i1>)
+
+declare void @llvm.masked.compressstore.v64i8(<64 x i8>, ptr, <64 x i1>)
+declare void @llvm.masked.compressstore.v32i8(<32 x i8>, ptr, <32 x i1>)
+declare void @llvm.masked.compressstore.v16i8(<16 x i8>, ptr, <16 x i1>)
+declare void @llvm.masked.compressstore.v8i8(<8 x i8>, ptr, <8 x i1>)
define i32 @masked_load() {
; SSE2-LABEL: 'masked_load'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0v7f64(<7 x double>* undef, i32 1, <7 x i1> undef, <7 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0v6f64(<6 x double>* undef, i32 1, <6 x i1> undef, <6 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0v5f64(<5 x double>* undef, i32 1, <5 x i1> undef, <5 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0v3f64(<3 x double>* undef, i32 1, <3 x i1> undef, <3 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0v15f32(<15 x float>* undef, i32 1, <15 x i1> undef, <15 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0v14f32(<14 x float>* undef, i32 1, <14 x i1> undef, <14 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0v13f32(<13 x float>* undef, i32 1, <13 x i1> undef, <13 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0v12f32(<12 x float>* undef, i32 1, <12 x i1> undef, <12 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0v11f32(<11 x float>* undef, i32 1, <11 x i1> undef, <11 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0v10f32(<10 x float>* undef, i32 1, <10 x i1> undef, <10 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0v9f32(<9 x float>* undef, i32 1, <9 x i1> undef, <9 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>* undef, i32 1, <7 x i1> undef, <7 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0v6f32(<6 x float>* undef, i32 1, <6 x i1> undef, <6 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0v5f32(<5 x float>* undef, i32 1, <5 x i1> undef, <5 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0v3f32(<3 x float>* undef, i32 1, <3 x i1> undef, <3 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0v1f32(<1 x float>* undef, i32 1, <1 x i1> undef, <1 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0v7i64(<7 x i64>* undef, i32 1, <7 x i1> undef, <7 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0v6i64(<6 x i64>* undef, i32 1, <6 x i1> undef, <6 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0v5i64(<5 x i64>* undef, i32 1, <5 x i1> undef, <5 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0v3i64(<3 x i64>* undef, i32 1, <3 x i1> undef, <3 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0v15i32(<15 x i32>* undef, i32 1, <15 x i1> undef, <15 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0v14i32(<14 x i32>* undef, i32 1, <14 x i1> undef, <14 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0v13i32(<13 x i32>* undef, i32 1, <13 x i1> undef, <13 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0v12i32(<12 x i32>* undef, i32 1, <12 x i1> undef, <12 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0v11i32(<11 x i32>* undef, i32 1, <11 x i1> undef, <11 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0v10i32(<10 x i32>* undef, i32 1, <10 x i1> undef, <10 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0v9i32(<9 x i32>* undef, i32 1, <9 x i1> undef, <9 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0v7i32(<7 x i32>* undef, i32 1, <7 x i1> undef, <7 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0v6i32(<6 x i32>* undef, i32 1, <6 x i1> undef, <6 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0v5i32(<5 x i32>* undef, i32 1, <5 x i1> undef, <5 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SSE42-LABEL: 'masked_load'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0v7f64(<7 x double>* undef, i32 1, <7 x i1> undef, <7 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0v6f64(<6 x double>* undef, i32 1, <6 x i1> undef, <6 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0v5f64(<5 x double>* undef, i32 1, <5 x i1> undef, <5 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0v3f64(<3 x double>* undef, i32 1, <3 x i1> undef, <3 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0v15f32(<15 x float>* undef, i32 1, <15 x i1> undef, <15 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0v14f32(<14 x float>* undef, i32 1, <14 x i1> undef, <14 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0v13f32(<13 x float>* undef, i32 1, <13 x i1> undef, <13 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0v12f32(<12 x float>* undef, i32 1, <12 x i1> undef, <12 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0v11f32(<11 x float>* undef, i32 1, <11 x i1> undef, <11 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0v10f32(<10 x float>* undef, i32 1, <10 x i1> undef, <10 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0v9f32(<9 x float>* undef, i32 1, <9 x i1> undef, <9 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>* undef, i32 1, <7 x i1> undef, <7 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0v6f32(<6 x float>* undef, i32 1, <6 x i1> undef, <6 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0v5f32(<5 x float>* undef, i32 1, <5 x i1> undef, <5 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0v3f32(<3 x float>* undef, i32 1, <3 x i1> undef, <3 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0v1f32(<1 x float>* undef, i32 1, <1 x i1> undef, <1 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0v7i64(<7 x i64>* undef, i32 1, <7 x i1> undef, <7 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0v6i64(<6 x i64>* undef, i32 1, <6 x i1> undef, <6 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0v5i64(<5 x i64>* undef, i32 1, <5 x i1> undef, <5 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0v3i64(<3 x i64>* undef, i32 1, <3 x i1> undef, <3 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0v15i32(<15 x i32>* undef, i32 1, <15 x i1> undef, <15 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0v14i32(<14 x i32>* undef, i32 1, <14 x i1> undef, <14 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0v13i32(<13 x i32>* undef, i32 1, <13 x i1> undef, <13 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0v12i32(<12 x i32>* undef, i32 1, <12 x i1> undef, <12 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0v11i32(<11 x i32>* undef, i32 1, <11 x i1> undef, <11 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0v10i32(<10 x i32>* undef, i32 1, <10 x i1> undef, <10 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0v9i32(<9 x i32>* undef, i32 1, <9 x i1> undef, <9 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0v7i32(<7 x i32>* undef, i32 1, <7 x i1> undef, <7 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0v6i32(<6 x i32>* undef, i32 1, <6 x i1> undef, <6 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0v5i32(<5 x i32>* undef, i32 1, <5 x i1> undef, <5 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; AVX-LABEL: 'masked_load'
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0v7f64(<7 x double>* undef, i32 1, <7 x i1> undef, <7 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0v6f64(<6 x double>* undef, i32 1, <6 x i1> undef, <6 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0v5f64(<5 x double>* undef, i32 1, <5 x i1> undef, <5 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0v3f64(<3 x double>* undef, i32 1, <3 x i1> undef, <3 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0v15f32(<15 x float>* undef, i32 1, <15 x i1> undef, <15 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0v14f32(<14 x float>* undef, i32 1, <14 x i1> undef, <14 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0v13f32(<13 x float>* undef, i32 1, <13 x i1> undef, <13 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0v12f32(<12 x float>* undef, i32 1, <12 x i1> undef, <12 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0v11f32(<11 x float>* undef, i32 1, <11 x i1> undef, <11 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0v10f32(<10 x float>* undef, i32 1, <10 x i1> undef, <10 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0v9f32(<9 x float>* undef, i32 1, <9 x i1> undef, <9 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>* undef, i32 1, <7 x i1> undef, <7 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0v6f32(<6 x float>* undef, i32 1, <6 x i1> undef, <6 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0v5f32(<5 x float>* undef, i32 1, <5 x i1> undef, <5 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0v3f32(<3 x float>* undef, i32 1, <3 x i1> undef, <3 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0v1f32(<1 x float>* undef, i32 1, <1 x i1> undef, <1 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0v7i64(<7 x i64>* undef, i32 1, <7 x i1> undef, <7 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0v6i64(<6 x i64>* undef, i32 1, <6 x i1> undef, <6 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0v5i64(<5 x i64>* undef, i32 1, <5 x i1> undef, <5 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0v3i64(<3 x i64>* undef, i32 1, <3 x i1> undef, <3 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0v15i32(<15 x i32>* undef, i32 1, <15 x i1> undef, <15 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0v14i32(<14 x i32>* undef, i32 1, <14 x i1> undef, <14 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0v13i32(<13 x i32>* undef, i32 1, <13 x i1> undef, <13 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0v12i32(<12 x i32>* undef, i32 1, <12 x i1> undef, <12 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0v11i32(<11 x i32>* undef, i32 1, <11 x i1> undef, <11 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0v10i32(<10 x i32>* undef, i32 1, <10 x i1> undef, <10 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0v9i32(<9 x i32>* undef, i32 1, <9 x i1> undef, <9 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0v7i32(<7 x i32>* undef, i32 1, <7 x i1> undef, <7 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0v6i32(<6 x i32>* undef, i32 1, <6 x i1> undef, <6 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0v5i32(<5 x i32>* undef, i32 1, <5 x i1> undef, <5 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; KNL-LABEL: 'masked_load'
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0v7f64(<7 x double>* undef, i32 1, <7 x i1> undef, <7 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0v6f64(<6 x double>* undef, i32 1, <6 x i1> undef, <6 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0v5f64(<5 x double>* undef, i32 1, <5 x i1> undef, <5 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0v3f64(<3 x double>* undef, i32 1, <3 x i1> undef, <3 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0v15f32(<15 x float>* undef, i32 1, <15 x i1> undef, <15 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0v14f32(<14 x float>* undef, i32 1, <14 x i1> undef, <14 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0v13f32(<13 x float>* undef, i32 1, <13 x i1> undef, <13 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0v12f32(<12 x float>* undef, i32 1, <12 x i1> undef, <12 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0v11f32(<11 x float>* undef, i32 1, <11 x i1> undef, <11 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0v10f32(<10 x float>* undef, i32 1, <10 x i1> undef, <10 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0v9f32(<9 x float>* undef, i32 1, <9 x i1> undef, <9 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>* undef, i32 1, <7 x i1> undef, <7 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0v6f32(<6 x float>* undef, i32 1, <6 x i1> undef, <6 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0v5f32(<5 x float>* undef, i32 1, <5 x i1> undef, <5 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0v3f32(<3 x float>* undef, i32 1, <3 x i1> undef, <3 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0v1f32(<1 x float>* undef, i32 1, <1 x i1> undef, <1 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0v7i64(<7 x i64>* undef, i32 1, <7 x i1> undef, <7 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0v6i64(<6 x i64>* undef, i32 1, <6 x i1> undef, <6 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0v5i64(<5 x i64>* undef, i32 1, <5 x i1> undef, <5 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0v3i64(<3 x i64>* undef, i32 1, <3 x i1> undef, <3 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0v15i32(<15 x i32>* undef, i32 1, <15 x i1> undef, <15 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0v14i32(<14 x i32>* undef, i32 1, <14 x i1> undef, <14 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0v13i32(<13 x i32>* undef, i32 1, <13 x i1> undef, <13 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0v12i32(<12 x i32>* undef, i32 1, <12 x i1> undef, <12 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0v11i32(<11 x i32>* undef, i32 1, <11 x i1> undef, <11 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0v10i32(<10 x i32>* undef, i32 1, <10 x i1> undef, <10 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0v9i32(<9 x i32>* undef, i32 1, <9 x i1> undef, <9 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0v7i32(<7 x i32>* undef, i32 1, <7 x i1> undef, <7 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0v6i32(<6 x i32>* undef, i32 1, <6 x i1> undef, <6 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0v5i32(<5 x i32>* undef, i32 1, <5 x i1> undef, <5 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SKX-LABEL: 'masked_load'
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0v7f64(<7 x double>* undef, i32 1, <7 x i1> undef, <7 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0v6f64(<6 x double>* undef, i32 1, <6 x i1> undef, <6 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0v5f64(<5 x double>* undef, i32 1, <5 x i1> undef, <5 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0v3f64(<3 x double>* undef, i32 1, <3 x i1> undef, <3 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0v15f32(<15 x float>* undef, i32 1, <15 x i1> undef, <15 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0v14f32(<14 x float>* undef, i32 1, <14 x i1> undef, <14 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0v13f32(<13 x float>* undef, i32 1, <13 x i1> undef, <13 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0v12f32(<12 x float>* undef, i32 1, <12 x i1> undef, <12 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0v11f32(<11 x float>* undef, i32 1, <11 x i1> undef, <11 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0v10f32(<10 x float>* undef, i32 1, <10 x i1> undef, <10 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0v9f32(<9 x float>* undef, i32 1, <9 x i1> undef, <9 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>* undef, i32 1, <7 x i1> undef, <7 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0v6f32(<6 x float>* undef, i32 1, <6 x i1> undef, <6 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0v5f32(<5 x float>* undef, i32 1, <5 x i1> undef, <5 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0v3f32(<3 x float>* undef, i32 1, <3 x i1> undef, <3 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0v1f32(<1 x float>* undef, i32 1, <1 x i1> undef, <1 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0v7i64(<7 x i64>* undef, i32 1, <7 x i1> undef, <7 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0v6i64(<6 x i64>* undef, i32 1, <6 x i1> undef, <6 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0v5i64(<5 x i64>* undef, i32 1, <5 x i1> undef, <5 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0v3i64(<3 x i64>* undef, i32 1, <3 x i1> undef, <3 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0v15i32(<15 x i32>* undef, i32 1, <15 x i1> undef, <15 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0v14i32(<14 x i32>* undef, i32 1, <14 x i1> undef, <14 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0v13i32(<13 x i32>* undef, i32 1, <13 x i1> undef, <13 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0v12i32(<12 x i32>* undef, i32 1, <12 x i1> undef, <12 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0v11i32(<11 x i32>* undef, i32 1, <11 x i1> undef, <11 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0v10i32(<10 x i32>* undef, i32 1, <10 x i1> undef, <10 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0v9i32(<9 x i32>* undef, i32 1, <9 x i1> undef, <9 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0v7i32(<7 x i32>* undef, i32 1, <7 x i1> undef, <7 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0v6i32(<6 x i32>* undef, i32 1, <6 x i1> undef, <6 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0v5i32(<5 x i32>* undef, i32 1, <5 x i1> undef, <5 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
- %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
- %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0v7f64(<7 x double>* undef, i32 1, <7 x i1> undef, <7 x double> undef)
- %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0v6f64(<6 x double>* undef, i32 1, <6 x i1> undef, <6 x double> undef)
- %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0v5f64(<5 x double>* undef, i32 1, <5 x i1> undef, <5 x double> undef)
- %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)
- %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0v3f64(<3 x double>* undef, i32 1, <3 x i1> undef, <3 x double> undef)
- %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)
- %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef)
-
- %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)
- %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0v15f32(<15 x float>* undef, i32 1, <15 x i1> undef, <15 x float> undef)
- %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0v14f32(<14 x float>* undef, i32 1, <14 x i1> undef, <14 x float> undef)
- %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0v13f32(<13 x float>* undef, i32 1, <13 x i1> undef, <13 x float> undef)
- %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0v12f32(<12 x float>* undef, i32 1, <12 x i1> undef, <12 x float> undef)
- %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0v11f32(<11 x float>* undef, i32 1, <11 x i1> undef, <11 x float> undef)
- %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0v10f32(<10 x float>* undef, i32 1, <10 x i1> undef, <10 x float> undef)
- %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0v9f32(<9 x float>* undef, i32 1, <9 x i1> undef, <9 x float> undef)
- %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)
- %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>* undef, i32 1, <7 x i1> undef, <7 x float> undef)
- %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0v6f32(<6 x float>* undef, i32 1, <6 x i1> undef, <6 x float> undef)
- %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0v5f32(<5 x float>* undef, i32 1, <5 x i1> undef, <5 x float> undef)
- %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)
- %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0v3f32(<3 x float>* undef, i32 1, <3 x i1> undef, <3 x float> undef)
- %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)
- %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0v1f32(<1 x float>* undef, i32 1, <1 x i1> undef, <1 x float> undef)
-
- %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)
- %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0v7i64(<7 x i64>* undef, i32 1, <7 x i1> undef, <7 x i64> undef)
- %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0v6i64(<6 x i64>* undef, i32 1, <6 x i1> undef, <6 x i64> undef)
- %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0v5i64(<5 x i64>* undef, i32 1, <5 x i1> undef, <5 x i64> undef)
- %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)
- %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0v3i64(<3 x i64>* undef, i32 1, <3 x i1> undef, <3 x i64> undef)
- %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)
- %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-
- %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)
- %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0v15i32(<15 x i32>* undef, i32 1, <15 x i1> undef, <15 x i32> undef)
- %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0v14i32(<14 x i32>* undef, i32 1, <14 x i1> undef, <14 x i32> undef)
- %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0v13i32(<13 x i32>* undef, i32 1, <13 x i1> undef, <13 x i32> undef)
- %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0v12i32(<12 x i32>* undef, i32 1, <12 x i1> undef, <12 x i32> undef)
- %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0v11i32(<11 x i32>* undef, i32 1, <11 x i1> undef, <11 x i32> undef)
- %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0v10i32(<10 x i32>* undef, i32 1, <10 x i1> undef, <10 x i32> undef)
- %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0v9i32(<9 x i32>* undef, i32 1, <9 x i1> undef, <9 x i32> undef)
- %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
- %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0v7i32(<7 x i32>* undef, i32 1, <7 x i1> undef, <7 x i32> undef)
- %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0v6i32(<6 x i32>* undef, i32 1, <6 x i1> undef, <6 x i32> undef)
- %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0v5i32(<5 x i32>* undef, i32 1, <5 x i1> undef, <5 x i32> undef)
- %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
- %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef)
- %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
- %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef)
-
- %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
- %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
- %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
- %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-
- %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef)
- %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef)
- %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef)
- %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+ %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef)
+ %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef)
+ %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef)
+ %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef)
+ %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef)
+ %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef)
+ %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef)
+ %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef)
+
+ %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef)
+ %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef)
+ %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef)
+ %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef)
+ %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef)
+ %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef)
+ %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef)
+ %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef)
+ %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef)
+ %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef)
+ %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef)
+ %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef)
+ %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef)
+ %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef)
+ %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef)
+ %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef)
+
+ %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+ %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef)
+ %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef)
+ %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef)
+ %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+ %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef)
+ %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+ %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+
+ %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+ %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef)
+ %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef)
+ %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef)
+ %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef)
+ %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef)
+ %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef)
+ %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef)
+ %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+ %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef)
+ %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef)
+ %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef)
+ %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+ %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef)
+ %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+ %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef)
+
+ %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+ %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+ %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+ %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+
+ %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+ %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+ %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+ %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef)
ret i32 0
}
define i32 @masked_store() {
; SSE2-LABEL: 'masked_store'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v7f64.p0v7f64(<7 x double> undef, <7 x double>* undef, i32 1, <7 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v6f64.p0v6f64(<6 x double> undef, <6 x double>* undef, i32 1, <6 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v5f64.p0v5f64(<5 x double> undef, <5 x double>* undef, i32 1, <5 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3f64.p0v3f64(<3 x double> undef, <3 x double>* undef, i32 1, <3 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.store.v15f32.p0v15f32(<15 x float> undef, <15 x float>* undef, i32 1, <15 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v14f32.p0v14f32(<14 x float> undef, <14 x float>* undef, i32 1, <14 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v13f32.p0v13f32(<13 x float> undef, <13 x float>* undef, i32 1, <13 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v12f32.p0v12f32(<12 x float> undef, <12 x float>* undef, i32 1, <12 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.store.v11f32.p0v11f32(<11 x float> undef, <11 x float>* undef, i32 1, <11 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v10f32.p0v10f32(<10 x float> undef, <10 x float>* undef, i32 1, <10 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.store.v9f32.p0v9f32(<9 x float> undef, <9 x float>* undef, i32 1, <9 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v7f32.p0v7f32(<7 x float> undef, <7 x float>* undef, i32 1, <7 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v6f32.p0v6f32(<6 x float> undef, <6 x float>* undef, i32 1, <6 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v5f32.p0v5f32(<5 x float> undef, <5 x float>* undef, i32 1, <5 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.store.v3f32.p0v3f32(<3 x float> undef, <3 x float>* undef, i32 1, <3 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0v1f32(<1 x float> undef, <1 x float>* undef, i32 1, <1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v7i64.p0v7i64(<7 x i64> undef, <7 x i64>* undef, i32 1, <7 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v6i64.p0v6i64(<6 x i64> undef, <6 x i64>* undef, i32 1, <6 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v5i64.p0v5i64(<5 x i64> undef, <5 x i64>* undef, i32 1, <5 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i64.p0v3i64(<3 x i64> undef, <3 x i64>* undef, i32 1, <3 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: call void @llvm.masked.store.v15i32.p0v15i32(<15 x i32> undef, <15 x i32>* undef, i32 1, <15 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v14i32.p0v14i32(<14 x i32> undef, <14 x i32>* undef, i32 1, <14 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: call void @llvm.masked.store.v13i32.p0v13i32(<13 x i32> undef, <13 x i32>* undef, i32 1, <13 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.store.v12i32.p0v12i32(<12 x i32> undef, <12 x i32>* undef, i32 1, <12 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: call void @llvm.masked.store.v11i32.p0v11i32(<11 x i32> undef, <11 x i32>* undef, i32 1, <11 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v10i32.p0v10i32(<10 x i32> undef, <10 x i32>* undef, i32 1, <10 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.store.v9i32.p0v9i32(<9 x i32> undef, <9 x i32>* undef, i32 1, <9 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v7i32.p0v7i32(<7 x i32> undef, <7 x i32>* undef, i32 1, <7 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v6i32.p0v6i32(<6 x i32> undef, <6 x i32>* undef, i32 1, <6 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v5i32.p0v5i32(<5 x i32> undef, <5 x i32>* undef, i32 1, <5 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v3i32.p0v3i32(<3 x i32> undef, <3 x i32>* undef, i32 1, <3 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0v1i32(<1 x i32> undef, <1 x i32>* undef, i32 1, <1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 376 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 376 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SSE42-LABEL: 'masked_store'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v7f64.p0v7f64(<7 x double> undef, <7 x double>* undef, i32 1, <7 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v6f64.p0v6f64(<6 x double> undef, <6 x double>* undef, i32 1, <6 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0v5f64(<5 x double> undef, <5 x double>* undef, i32 1, <5 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v3f64.p0v3f64(<3 x double> undef, <3 x double>* undef, i32 1, <3 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v15f32.p0v15f32(<15 x float> undef, <15 x float>* undef, i32 1, <15 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v14f32.p0v14f32(<14 x float> undef, <14 x float>* undef, i32 1, <14 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v13f32.p0v13f32(<13 x float> undef, <13 x float>* undef, i32 1, <13 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v12f32.p0v12f32(<12 x float> undef, <12 x float>* undef, i32 1, <12 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.store.v11f32.p0v11f32(<11 x float> undef, <11 x float>* undef, i32 1, <11 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v10f32.p0v10f32(<10 x float> undef, <10 x float>* undef, i32 1, <10 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v9f32.p0v9f32(<9 x float> undef, <9 x float>* undef, i32 1, <9 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v7f32.p0v7f32(<7 x float> undef, <7 x float>* undef, i32 1, <7 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v6f32.p0v6f32(<6 x float> undef, <6 x float>* undef, i32 1, <6 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v5f32.p0v5f32(<5 x float> undef, <5 x float>* undef, i32 1, <5 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.store.v3f32.p0v3f32(<3 x float> undef, <3 x float>* undef, i32 1, <3 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0v1f32(<1 x float> undef, <1 x float>* undef, i32 1, <1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v7i64.p0v7i64(<7 x i64> undef, <7 x i64>* undef, i32 1, <7 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v6i64.p0v6i64(<6 x i64> undef, <6 x i64>* undef, i32 1, <6 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v5i64.p0v5i64(<5 x i64> undef, <5 x i64>* undef, i32 1, <5 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3i64.p0v3i64(<3 x i64> undef, <3 x i64>* undef, i32 1, <3 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v15i32.p0v15i32(<15 x i32> undef, <15 x i32>* undef, i32 1, <15 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v14i32.p0v14i32(<14 x i32> undef, <14 x i32>* undef, i32 1, <14 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v13i32.p0v13i32(<13 x i32> undef, <13 x i32>* undef, i32 1, <13 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v12i32.p0v12i32(<12 x i32> undef, <12 x i32>* undef, i32 1, <12 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.store.v11i32.p0v11i32(<11 x i32> undef, <11 x i32>* undef, i32 1, <11 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v10i32.p0v10i32(<10 x i32> undef, <10 x i32>* undef, i32 1, <10 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.store.v9i32.p0v9i32(<9 x i32> undef, <9 x i32>* undef, i32 1, <9 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v7i32.p0v7i32(<7 x i32> undef, <7 x i32>* undef, i32 1, <7 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v6i32.p0v6i32(<6 x i32> undef, <6 x i32>* undef, i32 1, <6 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v5i32.p0v5i32(<5 x i32> undef, <5 x i32>* undef, i32 1, <5 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3i32.p0v3i32(<3 x i32> undef, <3 x i32>* undef, i32 1, <3 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0v1i32(<1 x i32> undef, <1 x i32>* undef, i32 1, <1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; AVX-LABEL: 'masked_store'
-; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7f64.p0v7f64(<7 x double> undef, <7 x double>* undef, i32 1, <7 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6f64.p0v6f64(<6 x double> undef, <6 x double>* undef, i32 1, <6 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0v5f64(<5 x double> undef, <5 x double>* undef, i32 1, <5 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f64.p0v3f64(<3 x double> undef, <3 x double>* undef, i32 1, <3 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15f32.p0v15f32(<15 x float> undef, <15 x float>* undef, i32 1, <15 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14f32.p0v14f32(<14 x float> undef, <14 x float>* undef, i32 1, <14 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13f32.p0v13f32(<13 x float> undef, <13 x float>* undef, i32 1, <13 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12f32.p0v12f32(<12 x float> undef, <12 x float>* undef, i32 1, <12 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11f32.p0v11f32(<11 x float> undef, <11 x float>* undef, i32 1, <11 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10f32.p0v10f32(<10 x float> undef, <10 x float>* undef, i32 1, <10 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9f32.p0v9f32(<9 x float> undef, <9 x float>* undef, i32 1, <9 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7f32.p0v7f32(<7 x float> undef, <7 x float>* undef, i32 1, <7 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6f32.p0v6f32(<6 x float> undef, <6 x float>* undef, i32 1, <6 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5f32.p0v5f32(<5 x float> undef, <5 x float>* undef, i32 1, <5 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f32.p0v3f32(<3 x float> undef, <3 x float>* undef, i32 1, <3 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0v1f32(<1 x float> undef, <1 x float>* undef, i32 1, <1 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7i64.p0v7i64(<7 x i64> undef, <7 x i64>* undef, i32 1, <7 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6i64.p0v6i64(<6 x i64> undef, <6 x i64>* undef, i32 1, <6 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5i64.p0v5i64(<5 x i64> undef, <5 x i64>* undef, i32 1, <5 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i64.p0v3i64(<3 x i64> undef, <3 x i64>* undef, i32 1, <3 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15i32.p0v15i32(<15 x i32> undef, <15 x i32>* undef, i32 1, <15 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14i32.p0v14i32(<14 x i32> undef, <14 x i32>* undef, i32 1, <14 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13i32.p0v13i32(<13 x i32> undef, <13 x i32>* undef, i32 1, <13 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12i32.p0v12i32(<12 x i32> undef, <12 x i32>* undef, i32 1, <12 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11i32.p0v11i32(<11 x i32> undef, <11 x i32>* undef, i32 1, <11 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10i32.p0v10i32(<10 x i32> undef, <10 x i32>* undef, i32 1, <10 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9i32.p0v9i32(<9 x i32> undef, <9 x i32>* undef, i32 1, <9 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7i32.p0v7i32(<7 x i32> undef, <7 x i32>* undef, i32 1, <7 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6i32.p0v6i32(<6 x i32> undef, <6 x i32>* undef, i32 1, <6 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5i32.p0v5i32(<5 x i32> undef, <5 x i32>* undef, i32 1, <5 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i32.p0v3i32(<3 x i32> undef, <3 x i32>* undef, i32 1, <3 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0v1i32(<1 x i32> undef, <1 x i32>* undef, i32 1, <1 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; KNL-LABEL: 'masked_store'
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0v7f64(<7 x double> undef, <7 x double>* undef, i32 1, <7 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0v6f64(<6 x double> undef, <6 x double>* undef, i32 1, <6 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0v5f64(<5 x double> undef, <5 x double>* undef, i32 1, <5 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0v3f64(<3 x double> undef, <3 x double>* undef, i32 1, <3 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0v15f32(<15 x float> undef, <15 x float>* undef, i32 1, <15 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0v14f32(<14 x float> undef, <14 x float>* undef, i32 1, <14 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0v13f32(<13 x float> undef, <13 x float>* undef, i32 1, <13 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0v12f32(<12 x float> undef, <12 x float>* undef, i32 1, <12 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0v11f32(<11 x float> undef, <11 x float>* undef, i32 1, <11 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0v10f32(<10 x float> undef, <10 x float>* undef, i32 1, <10 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0v9f32(<9 x float> undef, <9 x float>* undef, i32 1, <9 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0v7f32(<7 x float> undef, <7 x float>* undef, i32 1, <7 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0v6f32(<6 x float> undef, <6 x float>* undef, i32 1, <6 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0v5f32(<5 x float> undef, <5 x float>* undef, i32 1, <5 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0v3f32(<3 x float> undef, <3 x float>* undef, i32 1, <3 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0v1f32(<1 x float> undef, <1 x float>* undef, i32 1, <1 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0v7i64(<7 x i64> undef, <7 x i64>* undef, i32 1, <7 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0v6i64(<6 x i64> undef, <6 x i64>* undef, i32 1, <6 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0v5i64(<5 x i64> undef, <5 x i64>* undef, i32 1, <5 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0v3i64(<3 x i64> undef, <3 x i64>* undef, i32 1, <3 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0v15i32(<15 x i32> undef, <15 x i32>* undef, i32 1, <15 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0v14i32(<14 x i32> undef, <14 x i32>* undef, i32 1, <14 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0v13i32(<13 x i32> undef, <13 x i32>* undef, i32 1, <13 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0v12i32(<12 x i32> undef, <12 x i32>* undef, i32 1, <12 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0v11i32(<11 x i32> undef, <11 x i32>* undef, i32 1, <11 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0v10i32(<10 x i32> undef, <10 x i32>* undef, i32 1, <10 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0v9i32(<9 x i32> undef, <9 x i32>* undef, i32 1, <9 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0v7i32(<7 x i32> undef, <7 x i32>* undef, i32 1, <7 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0v6i32(<6 x i32> undef, <6 x i32>* undef, i32 1, <6 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0v5i32(<5 x i32> undef, <5 x i32>* undef, i32 1, <5 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0v3i32(<3 x i32> undef, <3 x i32>* undef, i32 1, <3 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0v1i32(<1 x i32> undef, <1 x i32>* undef, i32 1, <1 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SKX-LABEL: 'masked_store'
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0v7f64(<7 x double> undef, <7 x double>* undef, i32 1, <7 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0v6f64(<6 x double> undef, <6 x double>* undef, i32 1, <6 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0v5f64(<5 x double> undef, <5 x double>* undef, i32 1, <5 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0v3f64(<3 x double> undef, <3 x double>* undef, i32 1, <3 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0v15f32(<15 x float> undef, <15 x float>* undef, i32 1, <15 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0v14f32(<14 x float> undef, <14 x float>* undef, i32 1, <14 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0v13f32(<13 x float> undef, <13 x float>* undef, i32 1, <13 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0v12f32(<12 x float> undef, <12 x float>* undef, i32 1, <12 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0v11f32(<11 x float> undef, <11 x float>* undef, i32 1, <11 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0v10f32(<10 x float> undef, <10 x float>* undef, i32 1, <10 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0v9f32(<9 x float> undef, <9 x float>* undef, i32 1, <9 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0v7f32(<7 x float> undef, <7 x float>* undef, i32 1, <7 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0v6f32(<6 x float> undef, <6 x float>* undef, i32 1, <6 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0v5f32(<5 x float> undef, <5 x float>* undef, i32 1, <5 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0v3f32(<3 x float> undef, <3 x float>* undef, i32 1, <3 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0v1f32(<1 x float> undef, <1 x float>* undef, i32 1, <1 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0v7i64(<7 x i64> undef, <7 x i64>* undef, i32 1, <7 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0v6i64(<6 x i64> undef, <6 x i64>* undef, i32 1, <6 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0v5i64(<5 x i64> undef, <5 x i64>* undef, i32 1, <5 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0v3i64(<3 x i64> undef, <3 x i64>* undef, i32 1, <3 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0v15i32(<15 x i32> undef, <15 x i32>* undef, i32 1, <15 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0v14i32(<14 x i32> undef, <14 x i32>* undef, i32 1, <14 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0v13i32(<13 x i32> undef, <13 x i32>* undef, i32 1, <13 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0v12i32(<12 x i32> undef, <12 x i32>* undef, i32 1, <12 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0v11i32(<11 x i32> undef, <11 x i32>* undef, i32 1, <11 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0v10i32(<10 x i32> undef, <10 x i32>* undef, i32 1, <10 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0v9i32(<9 x i32> undef, <9 x i32>* undef, i32 1, <9 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0v7i32(<7 x i32> undef, <7 x i32>* undef, i32 1, <7 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0v6i32(<6 x i32> undef, <6 x i32>* undef, i32 1, <6 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0v5i32(<5 x i32> undef, <5 x i32>* undef, i32 1, <5 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0v3i32(<3 x i32> undef, <3 x i32>* undef, i32 1, <3 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0v1i32(<1 x i32> undef, <1 x i32>* undef, i32 1, <1 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
- call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.store.v7f64.p0v7f64(<7 x double> undef, <7 x double>* undef, i32 1, <7 x i1> undef)
- call void @llvm.masked.store.v6f64.p0v6f64(<6 x double> undef, <6 x double>* undef, i32 1, <6 x i1> undef)
- call void @llvm.masked.store.v5f64.p0v5f64(<5 x double> undef, <5 x double>* undef, i32 1, <5 x i1> undef)
- call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.store.v3f64.p0v3f64(<3 x double> undef, <3 x double>* undef, i32 1, <3 x i1> undef)
- call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef)
-
- call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.store.v15f32.p0v15f32(<15 x float> undef, <15 x float>* undef, i32 1, <15 x i1> undef)
- call void @llvm.masked.store.v14f32.p0v14f32(<14 x float> undef, <14 x float>* undef, i32 1, <14 x i1> undef)
- call void @llvm.masked.store.v13f32.p0v13f32(<13 x float> undef, <13 x float>* undef, i32 1, <13 x i1> undef)
- call void @llvm.masked.store.v12f32.p0v12f32(<12 x float> undef, <12 x float>* undef, i32 1, <12 x i1> undef)
- call void @llvm.masked.store.v11f32.p0v11f32(<11 x float> undef, <11 x float>* undef, i32 1, <11 x i1> undef)
- call void @llvm.masked.store.v10f32.p0v10f32(<10 x float> undef, <10 x float>* undef, i32 1, <10 x i1> undef)
- call void @llvm.masked.store.v9f32.p0v9f32(<9 x float> undef, <9 x float>* undef, i32 1, <9 x i1> undef)
- call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.store.v7f32.p0v7f32(<7 x float> undef, <7 x float>* undef, i32 1, <7 x i1> undef)
- call void @llvm.masked.store.v6f32.p0v6f32(<6 x float> undef, <6 x float>* undef, i32 1, <6 x i1> undef)
- call void @llvm.masked.store.v5f32.p0v5f32(<5 x float> undef, <5 x float>* undef, i32 1, <5 x i1> undef)
- call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.store.v3f32.p0v3f32(<3 x float> undef, <3 x float>* undef, i32 1, <3 x i1> undef)
- call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.store.v1f32.p0v1f32(<1 x float> undef, <1 x float>* undef, i32 1, <1 x i1> undef)
-
- call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.store.v7i64.p0v7i64(<7 x i64> undef, <7 x i64>* undef, i32 1, <7 x i1> undef)
- call void @llvm.masked.store.v6i64.p0v6i64(<6 x i64> undef, <6 x i64>* undef, i32 1, <6 x i1> undef)
- call void @llvm.masked.store.v5i64.p0v5i64(<5 x i64> undef, <5 x i64>* undef, i32 1, <5 x i1> undef)
- call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.store.v3i64.p0v3i64(<3 x i64> undef, <3 x i64>* undef, i32 1, <3 x i1> undef)
- call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef)
-
- call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.store.v15i32.p0v15i32(<15 x i32> undef, <15 x i32>* undef, i32 1, <15 x i1> undef)
- call void @llvm.masked.store.v14i32.p0v14i32(<14 x i32> undef, <14 x i32>* undef, i32 1, <14 x i1> undef)
- call void @llvm.masked.store.v13i32.p0v13i32(<13 x i32> undef, <13 x i32>* undef, i32 1, <13 x i1> undef)
- call void @llvm.masked.store.v12i32.p0v12i32(<12 x i32> undef, <12 x i32>* undef, i32 1, <12 x i1> undef)
- call void @llvm.masked.store.v11i32.p0v11i32(<11 x i32> undef, <11 x i32>* undef, i32 1, <11 x i1> undef)
- call void @llvm.masked.store.v10i32.p0v10i32(<10 x i32> undef, <10 x i32>* undef, i32 1, <10 x i1> undef)
- call void @llvm.masked.store.v9i32.p0v9i32(<9 x i32> undef, <9 x i32>* undef, i32 1, <9 x i1> undef)
- call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.store.v7i32.p0v7i32(<7 x i32> undef, <7 x i32>* undef, i32 1, <7 x i1> undef)
- call void @llvm.masked.store.v6i32.p0v6i32(<6 x i32> undef, <6 x i32>* undef, i32 1, <6 x i1> undef)
- call void @llvm.masked.store.v5i32.p0v5i32(<5 x i32> undef, <5 x i32>* undef, i32 1, <5 x i1> undef)
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.store.v3i32.p0v3i32(<3 x i32> undef, <3 x i32>* undef, i32 1, <3 x i1> undef)
- call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.store.v1i32.p0v1i32(<1 x i32> undef, <1 x i32>* undef, i32 1, <1 x i1> undef)
-
- call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef)
- call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef)
-
- call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef)
- call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef)
- call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef)
+ call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef)
+ call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef)
+ call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef)
+ call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef)
+
+ call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef)
+ call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef)
+ call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef)
+ call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef)
+ call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef)
+ call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef)
+ call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef)
+ call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef)
+ call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef)
+ call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef)
+ call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef)
+ call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef)
+
+ call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef)
+ call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef)
+ call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef)
+ call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef)
+ call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef)
+
+ call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef)
+ call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef)
+ call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef)
+ call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef)
+ call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef)
+ call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef)
+ call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef)
+ call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef)
+ call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef)
+ call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef)
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef)
+ call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef)
+
+ call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef)
+ call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef)
+
+ call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef)
+ call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef)
+ call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef)
ret i32 0
}
define i32 @masked_gather() {
; SSE2-LABEL: 'masked_gather'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 316 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 316 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SSE42-LABEL: 'masked_gather'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; AVX1-LABEL: 'masked_gather'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; AVX2-LABEL: 'masked_gather'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SKL-LABEL: 'masked_gather'
-; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; KNL-LABEL: 'masked_gather'
-; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SKX-LABEL: 'masked_gather'
-; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
- %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef)
- %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef)
- %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef)
- %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef)
+ %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef)
+ %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef)
+ %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef)
+ %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef)
- %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef)
- %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef)
- %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef)
- %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef)
+ %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef)
+ %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef)
+ %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef)
+ %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef)
- %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
- %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
- %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
- %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
+ %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+ %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+ %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+ %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef)
- %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
- %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
- %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
- %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+ %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+ %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+ %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+ %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef)
- %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
- %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
- %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
- %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
+ %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef)
+ %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef)
+ %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef)
+ %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef)
- %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
- %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
- %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
- %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
+ %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef)
+ %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef)
+ %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef)
+ %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef)
ret i32 0
}
define i32 @masked_scatter() {
; SSE2-LABEL: 'masked_scatter'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 316 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 316 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SSE42-LABEL: 'masked_scatter'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; AVX-LABEL: 'masked_scatter'
-; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 106 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 210 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 105 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 106 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 210 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 105 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; KNL-LABEL: 'masked_scatter'
-; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
-; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SKX-LABEL: 'masked_scatter'
-; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
-; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
- call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef)
+ call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
- call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef)
+ call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef)
- call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef)
+ call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef)
- call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef)
- call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef)
- call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef)
+ call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+ call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef)
- call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef)
- call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef)
- call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef)
- call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef)
+ call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef)
+ call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef)
+ call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef)
+ call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef)
ret i32 0
}
define i32 @masked_expandload() {
; SSE2-LABEL: 'masked_expandload'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SSE42-LABEL: 'masked_expandload'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; AVX-LABEL: 'masked_expandload'
-; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; AVX512-LABEL: 'masked_expandload'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
- %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(double* undef, <8 x i1> undef, <8 x double> undef)
- %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef)
- %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef)
- %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef)
+ %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef)
+ %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef)
+ %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef)
+ %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef)
- %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef)
- %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef)
- %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef)
- %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef)
+ %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef)
+ %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef)
+ %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef)
+ %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef)
- %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef)
- %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef)
- %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef)
- %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef)
+ %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef)
+ %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef)
+ %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef)
+ %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef)
- %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef)
- %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef)
- %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef)
- %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef)
+ %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef)
+ %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef)
+ %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef)
+ %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef)
- %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef)
- %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef)
- %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef)
- %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef)
+ %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef)
+ %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef)
+ %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef)
+ %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef)
- %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef)
- %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef)
- %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef)
- %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef)
+ %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef)
+ %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef)
+ %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef)
+ %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef)
ret i32 0
}
define i32 @masked_compressstore() {
; SSE2-LABEL: 'masked_compressstore'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SSE42-LABEL: 'masked_compressstore'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; AVX1-LABEL: 'masked_compressstore'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; AVX2-LABEL: 'masked_compressstore'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; SKL-LABEL: 'masked_compressstore'
-; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
-; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
; AVX512-LABEL: 'masked_compressstore'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
;
- call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef)
- call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef)
- call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef)
- call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef)
+ call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef)
+ call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef)
+ call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef)
+ call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef)
- call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef)
- call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef)
- call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef)
- call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef)
+ call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef)
+ call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef)
+ call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef)
+ call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef)
- call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef)
- call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef)
- call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef)
- call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef)
+ call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef)
+ call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef)
+ call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef)
+ call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef)
- call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef)
- call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef)
- call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef)
- call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef)
+ call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef)
+ call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef)
+ call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef)
+ call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef)
- call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef)
- call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef)
- call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef)
- call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef)
+ call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef)
+ call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef)
+ call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef)
+ call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef)
- call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef)
- call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef)
- call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef)
- call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef)
+ call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef)
+ call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef)
+ call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef)
+ call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef)
ret i32 0
}
-define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) {
+define <2 x double> @test1(<2 x i64> %trigger, ptr %addr, <2 x double> %dst) {
; SSE2-LABEL: 'test1'
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;
; SSE42-LABEL: 'test1'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;
; AVX-LABEL: 'test1'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;
; AVX512-LABEL: 'test1'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;
%mask = icmp eq <2 x i64> %trigger, zeroinitializer
- %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)
+ %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)
ret <2 x double> %res
}
-define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
+define <4 x i32> @test2(<4 x i32> %trigger, ptr %addr, <4 x i32> %dst) {
; SSE2-LABEL: 'test2'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; SSE42-LABEL: 'test2'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; AVX-LABEL: 'test2'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; AVX512-LABEL: 'test2'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
- %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
+ %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
ret <4 x i32> %res
}
-define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
+define void @test3(<4 x i32> %trigger, ptr %addr, <4 x i32> %val) {
; SSE2-LABEL: 'test3'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'test3'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX-LABEL: 'test3'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
+; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512-LABEL: 'test3'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
- call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask)
+ call void @llvm.masked.store.v4i32.p0(<4 x i32>%val, ptr %addr, i32 4, <4 x i1>%mask)
ret void
}
-define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) {
+define <8 x float> @test4(<8 x i32> %trigger, ptr %addr, <8 x float> %dst) {
; SSE2-LABEL: 'test4'
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
;
; SSE42-LABEL: 'test4'
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
;
; AVX1-LABEL: 'test4'
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
;
; AVX2-LABEL: 'test4'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
;
; SKL-LABEL: 'test4'
; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
-; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
+; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
;
; AVX512-LABEL: 'test4'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
;
%mask = icmp eq <8 x i32> %trigger, zeroinitializer
- %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1>%mask, <8 x float>%dst)
+ %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1>%mask, <8 x float>%dst)
ret <8 x float> %res
}
-define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
+define void @test5(<2 x i32> %trigger, ptr %addr, <2 x float> %val) {
; SSE2-LABEL: 'test5'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'test5'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX-LABEL: 'test5'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512-LABEL: 'test5'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
- call void @llvm.masked.store.v2f32.p0v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask)
+ call void @llvm.masked.store.v2f32.p0(<2 x float>%val, ptr %addr, i32 4, <2 x i1>%mask)
ret void
}
-define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
+define void @test6(<2 x i32> %trigger, ptr %addr, <2 x i32> %val) {
; SSE2-LABEL: 'test6'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'test6'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX-LABEL: 'test6'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
+; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512-LABEL: 'test6'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
- call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
+ call void @llvm.masked.store.v2i32.p0(<2 x i32>%val, ptr %addr, i32 4, <2 x i1>%mask)
ret void
}
-define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) {
+define <2 x float> @test7(<2 x i32> %trigger, ptr %addr, <2 x float> %dst) {
; SSE2-LABEL: 'test7'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
;
; SSE42-LABEL: 'test7'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
;
; AVX-LABEL: 'test7'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
;
; AVX512-LABEL: 'test7'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
;
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
- %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
+ %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
ret <2 x float> %res
}
-define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
+define <2 x i32> @test8(<2 x i32> %trigger, ptr %addr, <2 x i32> %dst) {
; SSE2-LABEL: 'test8'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
;
; SSE42-LABEL: 'test8'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
;
; AVX-LABEL: 'test8'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
;
; AVX512-LABEL: 'test8'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
;
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
- %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
+ %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
ret <2 x i32> %res
}
-define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0) {
+define <2 x double> @test_gather_2f64(<2 x ptr> %ptrs, <2 x i1> %mask, <2 x double> %src0) {
; SSE2-LABEL: 'test_gather_2f64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;
; SSE42-LABEL: 'test_gather_2f64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;
; AVX1-LABEL: 'test_gather_2f64'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;
; AVX2-LABEL: 'test_gather_2f64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;
; SKL-LABEL: 'test_gather_2f64'
-; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;
; AVX512-LABEL: 'test_gather_2f64'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
;
- %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+ %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
ret <2 x double> %res
}
-define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0) {
+define <4 x i32> @test_gather_4i32(<4 x ptr> %ptrs, <4 x i1> %mask, <4 x i32> %src0) {
; SSE2-LABEL: 'test_gather_4i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; SSE42-LABEL: 'test_gather_4i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; AVX1-LABEL: 'test_gather_4i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; AVX2-LABEL: 'test_gather_4i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; SKL-LABEL: 'test_gather_4i32'
-; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; KNL-LABEL: 'test_gather_4i32'
-; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; SKX-LABEL: 'test_gather_4i32'
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
- %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+ %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
ret <4 x i32> %res
}
-define <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0) {
+define <4 x i32> @test_gather_4i32_const_mask(<4 x ptr> %ptrs, <4 x i32> %src0) {
; SSE2-LABEL: 'test_gather_4i32_const_mask'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; SSE42-LABEL: 'test_gather_4i32_const_mask'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; AVX1-LABEL: 'test_gather_4i32_const_mask'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; AVX2-LABEL: 'test_gather_4i32_const_mask'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; SKL-LABEL: 'test_gather_4i32_const_mask'
-; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
+; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; KNL-LABEL: 'test_gather_4i32_const_mask'
-; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
+; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
; SKX-LABEL: 'test_gather_4i32_const_mask'
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
;
- %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
+ %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
ret <4 x i32> %res
}
-define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) {
+define <16 x float> @test_gather_16f32_const_mask(ptr %base, <16 x i32> %ind) {
; SSE2-LABEL: 'test_gather_16f32_const_mask'
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; SSE42-LABEL: 'test_gather_16f32_const_mask'
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX1-LABEL: 'test_gather_16f32_const_mask'
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX2-LABEL: 'test_gather_16f32_const_mask'
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; SKL-LABEL: 'test_gather_16f32_const_mask'
; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX512-LABEL: 'test_gather_16f32_const_mask'
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
%sext_ind = sext <16 x i32> %ind to <16 x i64>
- %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
+ %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
- %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+ %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
ret <16 x float>%res
}
-define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) {
+define <16 x float> @test_gather_16f32_var_mask(ptr %base, <16 x i32> %ind, <16 x i1>%mask) {
; SSE2-LABEL: 'test_gather_16f32_var_mask'
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; SSE42-LABEL: 'test_gather_16f32_var_mask'
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX1-LABEL: 'test_gather_16f32_var_mask'
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX2-LABEL: 'test_gather_16f32_var_mask'
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; SKL-LABEL: 'test_gather_16f32_var_mask'
; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX512-LABEL: 'test_gather_16f32_var_mask'
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
-; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
+; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
%sext_ind = sext <16 x i32> %ind to <16 x i64>
- %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
+ %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind
- %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+ %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
ret <16 x float>%res
}
-define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) {
+define <16 x float> @test_gather_16f32_ra_var_mask(<16 x ptr> %ptrs, <16 x i32> %ind, <16 x i1>%mask) {
; SSE2-LABEL: 'test_gather_16f32_ra_var_mask'
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
-; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind
+; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; SSE42-LABEL: 'test_gather_16f32_ra_var_mask'
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
-; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind
+; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX1-LABEL: 'test_gather_16f32_ra_var_mask'
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
-; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind
+; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX2-LABEL: 'test_gather_16f32_ra_var_mask'
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
-; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind
+; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; SKL-LABEL: 'test_gather_16f32_ra_var_mask'
; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
-; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind
+; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX512-LABEL: 'test_gather_16f32_ra_var_mask'
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
-; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind
+; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
%sext_ind = sext <16 x i32> %ind to <16 x i64>
- %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
+ %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind
- %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+ %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
ret <16 x float>%res
}
-define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind) {
+define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) {
; SSE2-LABEL: 'test_gather_16f32_const_mask2'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
-; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
+; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; SSE42-LABEL: 'test_gather_16f32_const_mask2'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
-; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
+; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX1-LABEL: 'test_gather_16f32_const_mask2'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
-; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
+; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX2-LABEL: 'test_gather_16f32_const_mask2'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
-; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
+; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; SKL-LABEL: 'test_gather_16f32_const_mask2'
-; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
-; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
+; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
+; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
-; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
+; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
; AVX512-LABEL: 'test_gather_16f32_const_mask2'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
-; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
+; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res
;
- %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
- %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
+ %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
+ %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
%sext_ind = sext <16 x i32> %ind to <16 x i64>
- %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
+ %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind
- %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+ %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
ret <16 x float>%res
}
-define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
+define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
; SSE2-LABEL: 'test_scatter_16i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'test_scatter_16i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX1-LABEL: 'test_scatter_16i32'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX2-LABEL: 'test_scatter_16i32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SKL-LABEL: 'test_scatter_16i32'
-; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
-; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
-; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
+; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
+; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; SKL-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; SKL-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512-LABEL: 'test_scatter_16i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
- %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
+ %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0
+ %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer
- %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
+ %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind
%imask = bitcast i16 %mask to <16 x i1>
- call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+ call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32>%val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask)
ret void
}
-define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) {
+define void @test_scatter_8i32(<8 x i32>%a1, <8 x ptr> %ptr, <8 x i1>%mask) {
; SSE2-LABEL: 'test_scatter_8i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'test_scatter_8i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX-LABEL: 'test_scatter_8i32'
-; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
+; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512-LABEL: 'test_scatter_8i32'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
+ call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask)
ret void
}
-define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
+define void @test_scatter_4i32(<4 x i32>%a1, <4 x ptr> %ptr, <4 x i1>%mask) {
; SSE2-LABEL: 'test_scatter_4i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE42-LABEL: 'test_scatter_4i32'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX-LABEL: 'test_scatter_4i32'
-; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask)
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; KNL-LABEL: 'test_scatter_4i32'
-; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SKX-LABEL: 'test_scatter_4i32'
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+ call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask)
ret void
}
-define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) {
+define <4 x float> @test_gather_4f32(ptr %ptr, <4 x i32> %ind, <4 x i1>%mask) {
; SSE2-LABEL: 'test_gather_4f32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; SSE42-LABEL: 'test_gather_4f32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; AVX1-LABEL: 'test_gather_4f32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; AVX2-LABEL: 'test_gather_4f32'
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; SKL-LABEL: 'test_gather_4f32'
; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; KNL-LABEL: 'test_gather_4f32'
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; SKX-LABEL: 'test_gather_4f32'
; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
%sext_ind = sext <4 x i32> %ind to <4 x i64>
- %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
+ %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
- %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+ %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
ret <4 x float>%res
}
-define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) {
+define <4 x float> @test_gather_4f32_const_mask(ptr %ptr, <4 x i32> %ind) {
; SSE2-LABEL: 'test_gather_4f32_const_mask'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; SSE42-LABEL: 'test_gather_4f32_const_mask'
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; AVX1-LABEL: 'test_gather_4f32_const_mask'
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; AVX2-LABEL: 'test_gather_4f32_const_mask'
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; SKL-LABEL: 'test_gather_4f32_const_mask'
; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; KNL-LABEL: 'test_gather_4f32_const_mask'
; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
; SKX-LABEL: 'test_gather_4f32_const_mask'
; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64>
-; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
-; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
+; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res
;
%sext_ind = sext <4 x i32> %ind to <4 x i64>
- %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
+ %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind
- %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+ %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
ret <4 x float>%res
}
-declare <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
-declare <7 x double> @llvm.masked.load.v7f64.p0v7f64(<7 x double>*, i32, <7 x i1>, <7 x double>)
-declare <6 x double> @llvm.masked.load.v6f64.p0v6f64(<6 x double>*, i32, <6 x i1>, <6 x double>)
-declare <5 x double> @llvm.masked.load.v5f64.p0v5f64(<5 x double>*, i32, <5 x i1>, <5 x double>)
-declare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)
-declare <3 x double> @llvm.masked.load.v3f64.p0v3f64(<3 x double>*, i32, <3 x i1>, <3 x double>)
-declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
-declare <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>*, i32, <1 x i1>, <1 x double>)
-
-declare <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
-declare <15 x float> @llvm.masked.load.v15f32.p0v15f32(<15 x float>*, i32, <15 x i1>, <15 x float>)
-declare <14 x float> @llvm.masked.load.v14f32.p0v14f32(<14 x float>*, i32, <14 x i1>, <14 x float>)
-declare <13 x float> @llvm.masked.load.v13f32.p0v13f32(<13 x float>*, i32, <13 x i1>, <13 x float>)
-declare <12 x float> @llvm.masked.load.v12f32.p0v12f32(<12 x float>*, i32, <12 x i1>, <12 x float>)
-declare <11 x float> @llvm.masked.load.v11f32.p0v11f32(<11 x float>*, i32, <11 x i1>, <11 x float>)
-declare <10 x float> @llvm.masked.load.v10f32.p0v10f32(<10 x float>*, i32, <10 x i1>, <10 x float>)
-declare <9 x float> @llvm.masked.load.v9f32.p0v9f32(<9 x float>*, i32, <9 x i1>, <9 x float>)
-declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
-declare <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>*, i32, <7 x i1>, <7 x float>)
-declare <6 x float> @llvm.masked.load.v6f32.p0v6f32(<6 x float>*, i32, <6 x i1>, <6 x float>)
-declare <5 x float> @llvm.masked.load.v5f32.p0v5f32(<5 x float>*, i32, <5 x i1>, <5 x float>)
-declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
-declare <3 x float> @llvm.masked.load.v3f32.p0v3f32(<3 x float>*, i32, <3 x i1>, <3 x float>)
-declare <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
-declare <1 x float> @llvm.masked.load.v1f32.p0v1f32(<1 x float>*, i32, <1 x i1>, <1 x float>)
-
-declare <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>*, i32, <8 x i1>, <8 x i64>)
-declare <7 x i64> @llvm.masked.load.v7i64.p0v7i64(<7 x i64>*, i32, <7 x i1>, <7 x i64>)
-declare <6 x i64> @llvm.masked.load.v6i64.p0v6i64(<6 x i64>*, i32, <6 x i1>, <6 x i64>)
-declare <5 x i64> @llvm.masked.load.v5i64.p0v5i64(<5 x i64>*, i32, <5 x i1>, <5 x i64>)
-declare <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>*, i32, <4 x i1>, <4 x i64>)
-declare <3 x i64> @llvm.masked.load.v3i64.p0v3i64(<3 x i64>*, i32, <3 x i1>, <3 x i64>)
-declare <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>*, i32, <2 x i1>, <2 x i64>)
-declare <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>*, i32, <1 x i1>, <1 x i64>)
-
-declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
-declare <15 x i32> @llvm.masked.load.v15i32.p0v15i32(<15 x i32>*, i32, <15 x i1>, <15 x i32>)
-declare <14 x i32> @llvm.masked.load.v14i32.p0v14i32(<14 x i32>*, i32, <14 x i1>, <14 x i32>)
-declare <13 x i32> @llvm.masked.load.v13i32.p0v13i32(<13 x i32>*, i32, <13 x i1>, <13 x i32>)
-declare <12 x i32> @llvm.masked.load.v12i32.p0v12i32(<12 x i32>*, i32, <12 x i1>, <12 x i32>)
-declare <11 x i32> @llvm.masked.load.v11i32.p0v11i32(<11 x i32>*, i32, <11 x i1>, <11 x i32>)
-declare <10 x i32> @llvm.masked.load.v10i32.p0v10i32(<10 x i32>*, i32, <10 x i1>, <10 x i32>)
-declare <9 x i32> @llvm.masked.load.v9i32.p0v9i32(<9 x i32>*, i32, <9 x i1>, <9 x i32>)
-declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>)
-declare <7 x i32> @llvm.masked.load.v7i32.p0v7i32(<7 x i32>*, i32, <7 x i1>, <7 x i32>)
-declare <6 x i32> @llvm.masked.load.v6i32.p0v6i32(<6 x i32>*, i32, <6 x i1>, <6 x i32>)
-declare <5 x i32> @llvm.masked.load.v5i32.p0v5i32(<5 x i32>*, i32, <5 x i1>, <5 x i32>)
-declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
-declare <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>*, i32, <3 x i1>, <3 x i32>)
-declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
-declare <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>*, i32, <1 x i1>, <1 x i32>)
-
-declare <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>*, i32, <32 x i1>, <32 x i16>)
-declare <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i16>)
-declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>)
-declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32, <4 x i1>, <4 x i16>)
-
-declare <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>*, i32, <64 x i1>, <64 x i8>)
-declare <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>*, i32, <32 x i1>, <32 x i8>)
-declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>)
-declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>)
-
-declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>)
-declare void @llvm.masked.store.v7f64.p0v7f64(<7 x double>, <7 x double>*, i32, <7 x i1>)
-declare void @llvm.masked.store.v6f64.p0v6f64(<6 x double>, <6 x double>*, i32, <6 x i1>)
-declare void @llvm.masked.store.v5f64.p0v5f64(<5 x double>, <5 x double>*, i32, <5 x i1>)
-declare void @llvm.masked.store.v4f64.p0v4f64(<4 x double>, <4 x double>*, i32, <4 x i1>)
-declare void @llvm.masked.store.v3f64.p0v3f64(<3 x double>, <3 x double>*, i32, <3 x i1>)
-declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)
-declare void @llvm.masked.store.v1f64.p0v1f64(<1 x double>, <1 x double>*, i32, <1 x i1>)
-
-declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
-declare void @llvm.masked.store.v15f32.p0v15f32(<15 x float>, <15 x float>*, i32, <15 x i1>)
-declare void @llvm.masked.store.v14f32.p0v14f32(<14 x float>, <14 x float>*, i32, <14 x i1>)
-declare void @llvm.masked.store.v13f32.p0v13f32(<13 x float>, <13 x float>*, i32, <13 x i1>)
-declare void @llvm.masked.store.v12f32.p0v12f32(<12 x float>, <12 x float>*, i32, <12 x i1>)
-declare void @llvm.masked.store.v11f32.p0v11f32(<11 x float>, <11 x float>*, i32, <11 x i1>)
-declare void @llvm.masked.store.v10f32.p0v10f32(<10 x float>, <10 x float>*, i32, <10 x i1>)
-declare void @llvm.masked.store.v9f32.p0v9f32(<9 x float>, <9 x float>*, i32, <9 x i1>)
-declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32, <8 x i1>)
-declare void @llvm.masked.store.v7f32.p0v7f32(<7 x float>, <7 x float>*, i32, <7 x i1>)
-declare void @llvm.masked.store.v6f32.p0v6f32(<6 x float>, <6 x float>*, i32, <6 x i1>)
-declare void @llvm.masked.store.v5f32.p0v5f32(<5 x float>, <5 x float>*, i32, <5 x i1>)
-declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>)
-declare void @llvm.masked.store.v3f32.p0v3f32(<3 x float>, <3 x float>*, i32, <3 x i1>)
-declare void @llvm.masked.store.v2f32.p0v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>)
-declare void @llvm.masked.store.v1f32.p0v1f32(<1 x float>, <1 x float>*, i32, <1 x i1>)
-
-declare void @llvm.masked.store.v8i64.p0v8i64(<8 x i64>, <8 x i64>*, i32, <8 x i1>)
-declare void @llvm.masked.store.v7i64.p0v7i64(<7 x i64>, <7 x i64>*, i32, <7 x i1>)
-declare void @llvm.masked.store.v6i64.p0v6i64(<6 x i64>, <6 x i64>*, i32, <6 x i1>)
-declare void @llvm.masked.store.v5i64.p0v5i64(<5 x i64>, <5 x i64>*, i32, <5 x i1>)
-declare void @llvm.masked.store.v4i64.p0v4i64(<4 x i64>, <4 x i64>*, i32, <4 x i1>)
-declare void @llvm.masked.store.v3i64.p0v3i64(<3 x i64>, <3 x i64>*, i32, <3 x i1>)
-declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
-declare void @llvm.masked.store.v1i64.p0v1i64(<1 x i64>, <1 x i64>*, i32, <1 x i1>)
-
-declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
-declare void @llvm.masked.store.v15i32.p0v15i32(<15 x i32>, <15 x i32>*, i32, <15 x i1>)
-declare void @llvm.masked.store.v14i32.p0v14i32(<14 x i32>, <14 x i32>*, i32, <14 x i1>)
-declare void @llvm.masked.store.v13i32.p0v13i32(<13 x i32>, <13 x i32>*, i32, <13 x i1>)
-declare void @llvm.masked.store.v12i32.p0v12i32(<12 x i32>, <12 x i32>*, i32, <12 x i1>)
-declare void @llvm.masked.store.v11i32.p0v11i32(<11 x i32>, <11 x i32>*, i32, <11 x i1>)
-declare void @llvm.masked.store.v10i32.p0v10i32(<10 x i32>, <10 x i32>*, i32, <10 x i1>)
-declare void @llvm.masked.store.v9i32.p0v9i32(<9 x i32>, <9 x i32>*, i32, <9 x i1>)
-declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)
-declare void @llvm.masked.store.v7i32.p0v7i32(<7 x i32>, <7 x i32>*, i32, <7 x i1>)
-declare void @llvm.masked.store.v6i32.p0v6i32(<6 x i32>, <6 x i32>*, i32, <6 x i1>)
-declare void @llvm.masked.store.v5i32.p0v5i32(<5 x i32>, <5 x i32>*, i32, <5 x i1>)
-declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
-declare void @llvm.masked.store.v3i32.p0v3i32(<3 x i32>, <3 x i32>*, i32, <3 x i1>)
-declare void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>)
-declare void @llvm.masked.store.v1i32.p0v1i32(<1 x i32>, <1 x i32>*, i32, <1 x i1>)
-
-declare void @llvm.masked.store.v32i16.p0v32i16(<32 x i16>, <32 x i16>*, i32, <32 x i1>)
-declare void @llvm.masked.store.v16i16.p0v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>)
-declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>)
-declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32, <4 x i1>)
-
-declare void @llvm.masked.store.v64i8.p0v64i8(<64 x i8>, <64 x i8>*, i32, <64 x i1>)
-declare void @llvm.masked.store.v32i8.p0v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>)
-declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>)
-declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>)
-
-declare <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*>, i32, <8 x i1>, <8 x double>)
-declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*>, i32, <4 x i1>, <4 x double>)
-declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*>, i32, <2 x i1>, <2 x double>)
-declare <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*>, i32, <1 x i1>, <1 x double>)
-
-declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
-declare <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*>, i32, <8 x i1>, <8 x float>)
-declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*>, i32, <4 x i1>, <4 x float>)
-declare <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*>, i32, <2 x i1>, <2 x float>)
-
-declare <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*>, i32, <8 x i1>, <8 x i64>)
-declare <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>)
-declare <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>)
-declare <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*>, i32, <1 x i1>, <1 x i64>)
-
-declare <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>)
-declare <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>, i32, <8 x i1>, <8 x i32>)
-declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
-declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
-
-declare <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*>, i32, <32 x i1>, <32 x i16>)
-declare <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*>, i32, <16 x i1>, <16 x i16>)
-declare <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*>, i32, <8 x i1>, <8 x i16>)
-declare <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*>, i32, <4 x i1>, <4 x i16>)
-
-declare <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*>, i32, <64 x i1>, <64 x i8>)
-declare <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*>, i32, <32 x i1>, <32 x i8>)
-declare <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*>, i32, <16 x i1>, <16 x i8>)
-declare <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*>, i32, <8 x i1>, <8 x i8>)
-
-declare void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double>, <8 x double*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double>, <4 x double*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double>, <2 x double*>, i32, <2 x i1>)
-declare void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double>, <1 x double*>, i32, <1 x i1>)
-
-declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float>, <8 x float*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float>, <4 x float*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float>, <2 x float*>, i32, <2 x i1>)
-
-declare void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64>, <8 x i64*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64>, <4 x i64*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64>, <2 x i64*>, i32, <2 x i1>)
-declare void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64>, <1 x i64*>, i32, <1 x i1>)
-
-declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>, <16 x i32*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32>, <8 x i32*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>)
-declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32>, <2 x i32*>, i32, <2 x i1>)
-
-declare void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16>, <32 x i16*>, i32, <32 x i1>)
-declare void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16>, <16 x i16*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16>, <8 x i16*>, i32, <8 x i1>)
-declare void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16>, <4 x i16*>, i32, <4 x i1>)
-
-declare void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8>, <64 x i8*>, i32, <64 x i1>)
-declare void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8>, <32 x i8*>, i32, <32 x i1>)
-declare void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8>, <16 x i8*>, i32, <16 x i1>)
-declare void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8>, <8 x i8*>, i32, <8 x i1>)
-
-declare <8 x double> @llvm.masked.expandload.v8f64(double*, <8 x i1>, <8 x double>)
-declare <4 x double> @llvm.masked.expandload.v4f64(double*, <4 x i1>, <4 x double>)
-declare <2 x double> @llvm.masked.expandload.v2f64(double*, <2 x i1>, <2 x double>)
-declare <1 x double> @llvm.masked.expandload.v1f64(double*, <1 x i1>, <1 x double>)
-
-declare <16 x float> @llvm.masked.expandload.v16f32(float*, <16 x i1>, <16 x float>)
-declare <8 x float> @llvm.masked.expandload.v8f32(float*, <8 x i1>, <8 x float>)
-declare <4 x float> @llvm.masked.expandload.v4f32(float*, <4 x i1>, <4 x float>)
-declare <2 x float> @llvm.masked.expandload.v2f32(float*, <2 x i1>, <2 x float>)
-
-declare <8 x i64> @llvm.masked.expandload.v8i64(i64*, <8 x i1>, <8 x i64>)
-declare <4 x i64> @llvm.masked.expandload.v4i64(i64*, <4 x i1>, <4 x i64>)
-declare <2 x i64> @llvm.masked.expandload.v2i64(i64*, <2 x i1>, <2 x i64>)
-declare <1 x i64> @llvm.masked.expandload.v1i64(i64*, <1 x i1>, <1 x i64>)
-
-declare <16 x i32> @llvm.masked.expandload.v16i32(i32*, <16 x i1>, <16 x i32>)
-declare <8 x i32> @llvm.masked.expandload.v8i32(i32*, <8 x i1>, <8 x i32>)
-declare <4 x i32> @llvm.masked.expandload.v4i32(i32*, <4 x i1>, <4 x i32>)
-declare <2 x i32> @llvm.masked.expandload.v2i32(i32*, <2 x i1>, <2 x i32>)
-
-declare <32 x i16> @llvm.masked.expandload.v32i16(i16*, <32 x i1>, <32 x i16>)
-declare <16 x i16> @llvm.masked.expandload.v16i16(i16*, <16 x i1>, <16 x i16>)
-declare <8 x i16> @llvm.masked.expandload.v8i16(i16*, <8 x i1>, <8 x i16>)
-declare <4 x i16> @llvm.masked.expandload.v4i16(i16*, <4 x i1>, <4 x i16>)
-
-declare <64 x i8> @llvm.masked.expandload.v64i8(i8*, <64 x i1>, <64 x i8>)
-declare <32 x i8> @llvm.masked.expandload.v32i8(i8*, <32 x i1>, <32 x i8>)
-declare <16 x i8> @llvm.masked.expandload.v16i8(i8*, <16 x i1>, <16 x i8>)
-declare <8 x i8> @llvm.masked.expandload.v8i8(i8*, <8 x i1>, <8 x i8>)
-
-declare void @llvm.masked.compressstore.v8f64(<8 x double>, double*, <8 x i1>)
-declare void @llvm.masked.compressstore.v4f64(<4 x double>, double*, <4 x i1>)
-declare void @llvm.masked.compressstore.v2f64(<2 x double>, double*, <2 x i1>)
-declare void @llvm.masked.compressstore.v1f64(<1 x double>, double*, <1 x i1>)
-
-declare void @llvm.masked.compressstore.v16f32(<16 x float>, float*, <16 x i1>)
-declare void @llvm.masked.compressstore.v8f32(<8 x float>, float*, <8 x i1>)
-declare void @llvm.masked.compressstore.v4f32(<4 x float>, float*, <4 x i1>)
-declare void @llvm.masked.compressstore.v2f32(<2 x float>, float*, <2 x i1>)
-
-declare void @llvm.masked.compressstore.v8i64(<8 x i64>, i64*, <8 x i1>)
-declare void @llvm.masked.compressstore.v4i64(<4 x i64>, i64*, <4 x i1>)
-declare void @llvm.masked.compressstore.v2i64(<2 x i64>, i64*, <2 x i1>)
-declare void @llvm.masked.compressstore.v1i64(<1 x i64>, i64*, <1 x i1>)
-
-declare void @llvm.masked.compressstore.v16i32(<16 x i32>, i32*, <16 x i1>)
-declare void @llvm.masked.compressstore.v8i32(<8 x i32>, i32*, <8 x i1>)
-declare void @llvm.masked.compressstore.v4i32(<4 x i32>, i32*, <4 x i1>)
-declare void @llvm.masked.compressstore.v2i32(<2 x i32>, i32*, <2 x i1>)
-
-declare void @llvm.masked.compressstore.v32i16(<32 x i16>, i16*, <32 x i1>)
-declare void @llvm.masked.compressstore.v16i16(<16 x i16>, i16*, <16 x i1>)
-declare void @llvm.masked.compressstore.v8i16(<8 x i16>, i16*, <8 x i1>)
-declare void @llvm.masked.compressstore.v4i16(<4 x i16>, i16*, <4 x i1>)
-
-declare void @llvm.masked.compressstore.v64i8(<64 x i8>, i8*, <64 x i1>)
-declare void @llvm.masked.compressstore.v32i8(<32 x i8>, i8*, <32 x i1>)
-declare void @llvm.masked.compressstore.v16i8(<16 x i8>, i8*, <16 x i1>)
-declare void @llvm.masked.compressstore.v8i8(<8 x i8>, i8*, <8 x i1>)
+declare <8 x double> @llvm.masked.load.v8f64.p0(ptr, i32, <8 x i1>, <8 x double>)
+declare <7 x double> @llvm.masked.load.v7f64.p0(ptr, i32, <7 x i1>, <7 x double>)
+declare <6 x double> @llvm.masked.load.v6f64.p0(ptr, i32, <6 x i1>, <6 x double>)
+declare <5 x double> @llvm.masked.load.v5f64.p0(ptr, i32, <5 x i1>, <5 x double>)
+declare <4 x double> @llvm.masked.load.v4f64.p0(ptr, i32, <4 x i1>, <4 x double>)
+declare <3 x double> @llvm.masked.load.v3f64.p0(ptr, i32, <3 x i1>, <3 x double>)
+declare <2 x double> @llvm.masked.load.v2f64.p0(ptr, i32, <2 x i1>, <2 x double>)
+declare <1 x double> @llvm.masked.load.v1f64.p0(ptr, i32, <1 x i1>, <1 x double>)
+
+declare <16 x float> @llvm.masked.load.v16f32.p0(ptr, i32, <16 x i1>, <16 x float>)
+declare <15 x float> @llvm.masked.load.v15f32.p0(ptr, i32, <15 x i1>, <15 x float>)
+declare <14 x float> @llvm.masked.load.v14f32.p0(ptr, i32, <14 x i1>, <14 x float>)
+declare <13 x float> @llvm.masked.load.v13f32.p0(ptr, i32, <13 x i1>, <13 x float>)
+declare <12 x float> @llvm.masked.load.v12f32.p0(ptr, i32, <12 x i1>, <12 x float>)
+declare <11 x float> @llvm.masked.load.v11f32.p0(ptr, i32, <11 x i1>, <11 x float>)
+declare <10 x float> @llvm.masked.load.v10f32.p0(ptr, i32, <10 x i1>, <10 x float>)
+declare <9 x float> @llvm.masked.load.v9f32.p0(ptr, i32, <9 x i1>, <9 x float>)
+declare <8 x float> @llvm.masked.load.v8f32.p0(ptr, i32, <8 x i1>, <8 x float>)
+declare <7 x float> @llvm.masked.load.v7f32.p0(ptr, i32, <7 x i1>, <7 x float>)
+declare <6 x float> @llvm.masked.load.v6f32.p0(ptr, i32, <6 x i1>, <6 x float>)
+declare <5 x float> @llvm.masked.load.v5f32.p0(ptr, i32, <5 x i1>, <5 x float>)
+declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32, <4 x i1>, <4 x float>)
+declare <3 x float> @llvm.masked.load.v3f32.p0(ptr, i32, <3 x i1>, <3 x float>)
+declare <2 x float> @llvm.masked.load.v2f32.p0(ptr, i32, <2 x i1>, <2 x float>)
+declare <1 x float> @llvm.masked.load.v1f32.p0(ptr, i32, <1 x i1>, <1 x float>)
+
+declare <8 x i64> @llvm.masked.load.v8i64.p0(ptr, i32, <8 x i1>, <8 x i64>)
+declare <7 x i64> @llvm.masked.load.v7i64.p0(ptr, i32, <7 x i1>, <7 x i64>)
+declare <6 x i64> @llvm.masked.load.v6i64.p0(ptr, i32, <6 x i1>, <6 x i64>)
+declare <5 x i64> @llvm.masked.load.v5i64.p0(ptr, i32, <5 x i1>, <5 x i64>)
+declare <4 x i64> @llvm.masked.load.v4i64.p0(ptr, i32, <4 x i1>, <4 x i64>)
+declare <3 x i64> @llvm.masked.load.v3i64.p0(ptr, i32, <3 x i1>, <3 x i64>)
+declare <2 x i64> @llvm.masked.load.v2i64.p0(ptr, i32, <2 x i1>, <2 x i64>)
+declare <1 x i64> @llvm.masked.load.v1i64.p0(ptr, i32, <1 x i1>, <1 x i64>)
+
+declare <16 x i32> @llvm.masked.load.v16i32.p0(ptr, i32, <16 x i1>, <16 x i32>)
+declare <15 x i32> @llvm.masked.load.v15i32.p0(ptr, i32, <15 x i1>, <15 x i32>)
+declare <14 x i32> @llvm.masked.load.v14i32.p0(ptr, i32, <14 x i1>, <14 x i32>)
+declare <13 x i32> @llvm.masked.load.v13i32.p0(ptr, i32, <13 x i1>, <13 x i32>)
+declare <12 x i32> @llvm.masked.load.v12i32.p0(ptr, i32, <12 x i1>, <12 x i32>)
+declare <11 x i32> @llvm.masked.load.v11i32.p0(ptr, i32, <11 x i1>, <11 x i32>)
+declare <10 x i32> @llvm.masked.load.v10i32.p0(ptr, i32, <10 x i1>, <10 x i32>)
+declare <9 x i32> @llvm.masked.load.v9i32.p0(ptr, i32, <9 x i1>, <9 x i32>)
+declare <8 x i32> @llvm.masked.load.v8i32.p0(ptr, i32, <8 x i1>, <8 x i32>)
+declare <7 x i32> @llvm.masked.load.v7i32.p0(ptr, i32, <7 x i1>, <7 x i32>)
+declare <6 x i32> @llvm.masked.load.v6i32.p0(ptr, i32, <6 x i1>, <6 x i32>)
+declare <5 x i32> @llvm.masked.load.v5i32.p0(ptr, i32, <5 x i1>, <5 x i32>)
+declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>)
+declare <3 x i32> @llvm.masked.load.v3i32.p0(ptr, i32, <3 x i1>, <3 x i32>)
+declare <2 x i32> @llvm.masked.load.v2i32.p0(ptr, i32, <2 x i1>, <2 x i32>)
+declare <1 x i32> @llvm.masked.load.v1i32.p0(ptr, i32, <1 x i1>, <1 x i32>)
+
+declare <32 x i16> @llvm.masked.load.v32i16.p0(ptr, i32, <32 x i1>, <32 x i16>)
+declare <16 x i16> @llvm.masked.load.v16i16.p0(ptr, i32, <16 x i1>, <16 x i16>)
+declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32, <8 x i1>, <8 x i16>)
+declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32, <4 x i1>, <4 x i16>)
+
+declare <64 x i8> @llvm.masked.load.v64i8.p0(ptr, i32, <64 x i1>, <64 x i8>)
+declare <32 x i8> @llvm.masked.load.v32i8.p0(ptr, i32, <32 x i1>, <32 x i8>)
+declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32, <16 x i1>, <16 x i8>)
+declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32, <8 x i1>, <8 x i8>)
+
+declare void @llvm.masked.store.v8f64.p0(<8 x double>, ptr, i32, <8 x i1>)
+declare void @llvm.masked.store.v7f64.p0(<7 x double>, ptr, i32, <7 x i1>)
+declare void @llvm.masked.store.v6f64.p0(<6 x double>, ptr, i32, <6 x i1>)
+declare void @llvm.masked.store.v5f64.p0(<5 x double>, ptr, i32, <5 x i1>)
+declare void @llvm.masked.store.v4f64.p0(<4 x double>, ptr, i32, <4 x i1>)
+declare void @llvm.masked.store.v3f64.p0(<3 x double>, ptr, i32, <3 x i1>)
+declare void @llvm.masked.store.v2f64.p0(<2 x double>, ptr, i32, <2 x i1>)
+declare void @llvm.masked.store.v1f64.p0(<1 x double>, ptr, i32, <1 x i1>)
+
+declare void @llvm.masked.store.v16f32.p0(<16 x float>, ptr, i32, <16 x i1>)
+declare void @llvm.masked.store.v15f32.p0(<15 x float>, ptr, i32, <15 x i1>)
+declare void @llvm.masked.store.v14f32.p0(<14 x float>, ptr, i32, <14 x i1>)
+declare void @llvm.masked.store.v13f32.p0(<13 x float>, ptr, i32, <13 x i1>)
+declare void @llvm.masked.store.v12f32.p0(<12 x float>, ptr, i32, <12 x i1>)
+declare void @llvm.masked.store.v11f32.p0(<11 x float>, ptr, i32, <11 x i1>)
+declare void @llvm.masked.store.v10f32.p0(<10 x float>, ptr, i32, <10 x i1>)
+declare void @llvm.masked.store.v9f32.p0(<9 x float>, ptr, i32, <9 x i1>)
+declare void @llvm.masked.store.v8f32.p0(<8 x float>, ptr, i32, <8 x i1>)
+declare void @llvm.masked.store.v7f32.p0(<7 x float>, ptr, i32, <7 x i1>)
+declare void @llvm.masked.store.v6f32.p0(<6 x float>, ptr, i32, <6 x i1>)
+declare void @llvm.masked.store.v5f32.p0(<5 x float>, ptr, i32, <5 x i1>)
+declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>)
+declare void @llvm.masked.store.v3f32.p0(<3 x float>, ptr, i32, <3 x i1>)
+declare void @llvm.masked.store.v2f32.p0(<2 x float>, ptr, i32, <2 x i1>)
+declare void @llvm.masked.store.v1f32.p0(<1 x float>, ptr, i32, <1 x i1>)
+
+declare void @llvm.masked.store.v8i64.p0(<8 x i64>, ptr, i32, <8 x i1>)
+declare void @llvm.masked.store.v7i64.p0(<7 x i64>, ptr, i32, <7 x i1>)
+declare void @llvm.masked.store.v6i64.p0(<6 x i64>, ptr, i32, <6 x i1>)
+declare void @llvm.masked.store.v5i64.p0(<5 x i64>, ptr, i32, <5 x i1>)
+declare void @llvm.masked.store.v4i64.p0(<4 x i64>, ptr, i32, <4 x i1>)
+declare void @llvm.masked.store.v3i64.p0(<3 x i64>, ptr, i32, <3 x i1>)
+declare void @llvm.masked.store.v2i64.p0(<2 x i64>, ptr, i32, <2 x i1>)
+declare void @llvm.masked.store.v1i64.p0(<1 x i64>, ptr, i32, <1 x i1>)
+
+declare void @llvm.masked.store.v16i32.p0(<16 x i32>, ptr, i32, <16 x i1>)
+declare void @llvm.masked.store.v15i32.p0(<15 x i32>, ptr, i32, <15 x i1>)
+declare void @llvm.masked.store.v14i32.p0(<14 x i32>, ptr, i32, <14 x i1>)
+declare void @llvm.masked.store.v13i32.p0(<13 x i32>, ptr, i32, <13 x i1>)
+declare void @llvm.masked.store.v12i32.p0(<12 x i32>, ptr, i32, <12 x i1>)
+declare void @llvm.masked.store.v11i32.p0(<11 x i32>, ptr, i32, <11 x i1>)
+declare void @llvm.masked.store.v10i32.p0(<10 x i32>, ptr, i32, <10 x i1>)
+declare void @llvm.masked.store.v9i32.p0(<9 x i32>, ptr, i32, <9 x i1>)
+declare void @llvm.masked.store.v8i32.p0(<8 x i32>, ptr, i32, <8 x i1>)
+declare void @llvm.masked.store.v7i32.p0(<7 x i32>, ptr, i32, <7 x i1>)
+declare void @llvm.masked.store.v6i32.p0(<6 x i32>, ptr, i32, <6 x i1>)
+declare void @llvm.masked.store.v5i32.p0(<5 x i32>, ptr, i32, <5 x i1>)
+declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>)
+declare void @llvm.masked.store.v3i32.p0(<3 x i32>, ptr, i32, <3 x i1>)
+declare void @llvm.masked.store.v2i32.p0(<2 x i32>, ptr, i32, <2 x i1>)
+declare void @llvm.masked.store.v1i32.p0(<1 x i32>, ptr, i32, <1 x i1>)
+
+declare void @llvm.masked.store.v32i16.p0(<32 x i16>, ptr, i32, <32 x i1>)
+declare void @llvm.masked.store.v16i16.p0(<16 x i16>, ptr, i32, <16 x i1>)
+declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32, <8 x i1>)
+declare void @llvm.masked.store.v4i16.p0(<4 x i16>, ptr, i32, <4 x i1>)
+
+declare void @llvm.masked.store.v64i8.p0(<64 x i8>, ptr, i32, <64 x i1>)
+declare void @llvm.masked.store.v32i8.p0(<32 x i8>, ptr, i32, <32 x i1>)
+declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32, <16 x i1>)
+declare void @llvm.masked.store.v8i8.p0(<8 x i8>, ptr, i32, <8 x i1>)
+
+declare <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x double>)
+declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x double>)
+declare <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x double>)
+declare <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x double>)
+
+declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x float>)
+declare <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x float>)
+declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>)
+declare <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x float>)
+
+declare <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i64>)
+declare <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i64>)
+declare <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i64>)
+declare <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i64>)
+
+declare <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i32>)
+declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i32>)
+declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
+declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i32>)
+
+declare <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i16>)
+declare <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i16>)
+declare <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i16>)
+declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>)
+
+declare <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr>, i32, <64 x i1>, <64 x i8>)
+declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i8>)
+declare <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i8>)
+declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>)
+
+declare void @llvm.masked.scatter.v8f64.v8p0(<8 x double>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, i32, <2 x i1>)
+declare void @llvm.masked.scatter.v1f64.v1p0(<1 x double>, <1 x ptr>, i32, <1 x i1>)
+
+declare void @llvm.masked.scatter.v16f32.v16p0(<16 x float>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, i32, <2 x i1>)
+
+declare void @llvm.masked.scatter.v8i64.v8p0(<8 x i64>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4i64.v4p0(<4 x i64>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2i64.v2p0(<2 x i64>, <2 x ptr>, i32, <2 x i1>)
+declare void @llvm.masked.scatter.v1i64.v1p0(<1 x i64>, <1 x ptr>, i32, <1 x i1>)
+
+declare void @llvm.masked.scatter.v16i32.v16p0(<16 x i32>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, i32, <2 x i1>)
+
+declare void @llvm.masked.scatter.v32i16.v32p0(<32 x i16>, <32 x ptr>, i32, <32 x i1>)
+declare void @llvm.masked.scatter.v16i16.v16p0(<16 x i16>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>)
+
+declare void @llvm.masked.scatter.v64i8.v64p0(<64 x i8>, <64 x ptr>, i32, <64 x i1>)
+declare void @llvm.masked.scatter.v32i8.v32p0(<32 x i8>, <32 x ptr>, i32, <32 x i1>)
+declare void @llvm.masked.scatter.v16i8.v16p0(<16 x i8>, <16 x ptr>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v8i8.v8p0(<8 x i8>, <8 x ptr>, i32, <8 x i1>)
+
+declare <8 x double> @llvm.masked.expandload.v8f64(ptr, <8 x i1>, <8 x double>)
+declare <4 x double> @llvm.masked.expandload.v4f64(ptr, <4 x i1>, <4 x double>)
+declare <2 x double> @llvm.masked.expandload.v2f64(ptr, <2 x i1>, <2 x double>)
+declare <1 x double> @llvm.masked.expandload.v1f64(ptr, <1 x i1>, <1 x double>)
+
+declare <16 x float> @llvm.masked.expandload.v16f32(ptr, <16 x i1>, <16 x float>)
+declare <8 x float> @llvm.masked.expandload.v8f32(ptr, <8 x i1>, <8 x float>)
+declare <4 x float> @llvm.masked.expandload.v4f32(ptr, <4 x i1>, <4 x float>)
+declare <2 x float> @llvm.masked.expandload.v2f32(ptr, <2 x i1>, <2 x float>)
+
+declare <8 x i64> @llvm.masked.expandload.v8i64(ptr, <8 x i1>, <8 x i64>)
+declare <4 x i64> @llvm.masked.expandload.v4i64(ptr, <4 x i1>, <4 x i64>)
+declare <2 x i64> @llvm.masked.expandload.v2i64(ptr, <2 x i1>, <2 x i64>)
+declare <1 x i64> @llvm.masked.expandload.v1i64(ptr, <1 x i1>, <1 x i64>)
+
+declare <16 x i32> @llvm.masked.expandload.v16i32(ptr, <16 x i1>, <16 x i32>)
+declare <8 x i32> @llvm.masked.expandload.v8i32(ptr, <8 x i1>, <8 x i32>)
+declare <4 x i32> @llvm.masked.expandload.v4i32(ptr, <4 x i1>, <4 x i32>)
+declare <2 x i32> @llvm.masked.expandload.v2i32(ptr, <2 x i1>, <2 x i32>)
+
+declare <32 x i16> @llvm.masked.expandload.v32i16(ptr, <32 x i1>, <32 x i16>)
+declare <16 x i16> @llvm.masked.expandload.v16i16(ptr, <16 x i1>, <16 x i16>)
+declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>)
+declare <4 x i16> @llvm.masked.expandload.v4i16(ptr, <4 x i1>, <4 x i16>)
+
+declare <64 x i8> @llvm.masked.expandload.v64i8(ptr, <64 x i1>, <64 x i8>)
+declare <32 x i8> @llvm.masked.expandload.v32i8(ptr, <32 x i1>, <32 x i8>)
+declare <16 x i8> @llvm.masked.expandload.v16i8(ptr, <16 x i1>, <16 x i8>)
+declare <8 x i8> @llvm.masked.expandload.v8i8(ptr, <8 x i1>, <8 x i8>)
+
+declare void @llvm.masked.compressstore.v8f64(<8 x double>, ptr, <8 x i1>)
+declare void @llvm.masked.compressstore.v4f64(<4 x double>, ptr, <4 x i1>)
+declare void @llvm.masked.compressstore.v2f64(<2 x double>, ptr, <2 x i1>)
+declare void @llvm.masked.compressstore.v1f64(<1 x double>, ptr, <1 x i1>)
+
+declare void @llvm.masked.compressstore.v16f32(<16 x float>, ptr, <16 x i1>)
+declare void @llvm.masked.compressstore.v8f32(<8 x float>, ptr, <8 x i1>)
+declare void @llvm.masked.compressstore.v4f32(<4 x float>, ptr, <4 x i1>)
+declare void @llvm.masked.compressstore.v2f32(<2 x float>, ptr, <2 x i1>)
+
+declare void @llvm.masked.compressstore.v8i64(<8 x i64>, ptr, <8 x i1>)
+declare void @llvm.masked.compressstore.v4i64(<4 x i64>, ptr, <4 x i1>)
+declare void @llvm.masked.compressstore.v2i64(<2 x i64>, ptr, <2 x i1>)
+declare void @llvm.masked.compressstore.v1i64(<1 x i64>, ptr, <1 x i1>)
+
+declare void @llvm.masked.compressstore.v16i32(<16 x i32>, ptr, <16 x i1>)
+declare void @llvm.masked.compressstore.v8i32(<8 x i32>, ptr, <8 x i1>)
+declare void @llvm.masked.compressstore.v4i32(<4 x i32>, ptr, <4 x i1>)
+declare void @llvm.masked.compressstore.v2i32(<2 x i32>, ptr, <2 x i1>)
+
+declare void @llvm.masked.compressstore.v32i16(<32 x i16>, ptr, <32 x i1>)
+declare void @llvm.masked.compressstore.v16i16(<16 x i16>, ptr, <16 x i1>)
+declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr, <8 x i1>)
+declare void @llvm.masked.compressstore.v4i16(<4 x i16>, ptr, <4 x i1>)
+
+declare void @llvm.masked.compressstore.v64i8(<64 x i8>, ptr, <64 x i1>)
+declare void @llvm.masked.compressstore.v32i8(<32 x i8>, ptr, <32 x i1>)
+declare void @llvm.masked.compressstore.v16i8(<16 x i8>, ptr, <16 x i1>)
+declare void @llvm.masked.compressstore.v8i8(<8 x i8>, ptr, <8 x i1>)
@A = global [1024 x i8] zeroinitializer, align 128
@C = global [1024 x i16] zeroinitializer, align 128
-define void @test([1024 x i16]* %B) {
+define void @test(ptr %B) {
; SSE-LABEL: 'test'
-; SSE: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i16, i16* %inB, align 2
-; SSE: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i16, i16* %inB, align 2
-; SSE: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i16, i16* %inB, align 2
-; SSE: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i16, i16* %inB, align 2
-; SSE: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i16, i16* %inB, align 2
+; SSE: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i16, ptr %inB, align 2
+; SSE: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i16, ptr %inB, align 2
+; SSE: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i16, ptr %inB, align 2
+; SSE: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i16, ptr %inB, align 2
+; SSE: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i16, ptr %inB, align 2
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i16, i16* %inB, align 2
-; AVX1: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i16, i16* %inB, align 2
-; AVX1: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i16, i16* %inB, align 2
-; AVX1: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i16, i16* %inB, align 2
-; AVX1: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i16, i16* %inB, align 2
-; AVX1: LV: Found an estimated cost of 3000000 for VF 32 For instruction: %valB.loaded = load i16, i16* %inB, align 2
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i16, ptr %inB, align 2
+; AVX1: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i16, ptr %inB, align 2
+; AVX1: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i16, ptr %inB, align 2
+; AVX1: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i16, ptr %inB, align 2
+; AVX1: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i16, ptr %inB, align 2
+; AVX1: LV: Found an estimated cost of 3000000 for VF 32 For instruction: %valB.loaded = load i16, ptr %inB, align 2
;
; AVX2-LABEL: 'test'
-; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i16, i16* %inB, align 2
-; AVX2: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i16, i16* %inB, align 2
-; AVX2: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i16, i16* %inB, align 2
-; AVX2: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i16, i16* %inB, align 2
-; AVX2: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i16, i16* %inB, align 2
-; AVX2: LV: Found an estimated cost of 3000000 for VF 32 For instruction: %valB.loaded = load i16, i16* %inB, align 2
+; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i16, ptr %inB, align 2
+; AVX2: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i16, ptr %inB, align 2
+; AVX2: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i16, ptr %inB, align 2
+; AVX2: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i16, ptr %inB, align 2
+; AVX2: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i16, ptr %inB, align 2
+; AVX2: LV: Found an estimated cost of 3000000 for VF 32 For instruction: %valB.loaded = load i16, ptr %inB, align 2
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i16, i16* %inB, align 2
-; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: %valB.loaded = load i16, i16* %inB, align 2
-; AVX512: LV: Found an estimated cost of 2 for VF 4 For instruction: %valB.loaded = load i16, i16* %inB, align 2
-; AVX512: LV: Found an estimated cost of 1 for VF 8 For instruction: %valB.loaded = load i16, i16* %inB, align 2
-; AVX512: LV: Found an estimated cost of 1 for VF 16 For instruction: %valB.loaded = load i16, i16* %inB, align 2
-; AVX512: LV: Found an estimated cost of 1 for VF 32 For instruction: %valB.loaded = load i16, i16* %inB, align 2
-; AVX512: LV: Found an estimated cost of 2 for VF 64 For instruction: %valB.loaded = load i16, i16* %inB, align 2
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i16, ptr %inB, align 2
+; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: %valB.loaded = load i16, ptr %inB, align 2
+; AVX512: LV: Found an estimated cost of 2 for VF 4 For instruction: %valB.loaded = load i16, ptr %inB, align 2
+; AVX512: LV: Found an estimated cost of 1 for VF 8 For instruction: %valB.loaded = load i16, ptr %inB, align 2
+; AVX512: LV: Found an estimated cost of 1 for VF 16 For instruction: %valB.loaded = load i16, ptr %inB, align 2
+; AVX512: LV: Found an estimated cost of 1 for VF 32 For instruction: %valB.loaded = load i16, ptr %inB, align 2
+; AVX512: LV: Found an estimated cost of 2 for VF 64 For instruction: %valB.loaded = load i16, ptr %inB, align 2
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %end ]
- %inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
- %valA = load i8, i8* %inA
+ %inA = getelementptr inbounds [1024 x i8], ptr @A, i64 0, i64 %iv
+ %valA = load i8, ptr %inA
%canLoad = icmp ne i8 %valA, 0
br i1 %canLoad, label %load, label %mask
load:
- %inB = getelementptr inbounds [1024 x i16], [1024 x i16]* %B, i64 0, i64 %iv
- %valB.loaded = load i16, i16* %inB
+ %inB = getelementptr inbounds [1024 x i16], ptr %B, i64 0, i64 %iv
+ %valB.loaded = load i16, ptr %inB
br label %end
mask:
end:
%valB = phi i16 [ %valB.loaded, %load ], [ 0, %mask ]
- %out = getelementptr inbounds [1024 x i16], [1024 x i16]* @C, i64 0, i64 %iv
- store i16 %valB, i16* %out
+ %out = getelementptr inbounds [1024 x i16], ptr @C, i64 0, i64 %iv
+ store i16 %valB, ptr %out
%iv.next = add nuw nsw i64 %iv, 1
%cmp = icmp ult i64 %iv.next, 1024
@A = global [1024 x i8] zeroinitializer, align 128
@C = global [1024 x i32] zeroinitializer, align 128
-define void @test([1024 x i32]* %B) {
+define void @test(ptr %B) {
; SSE-LABEL: 'test'
-; SSE: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; SSE: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; SSE: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; SSE: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; SSE: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i32, i32* %inB, align 4
+; SSE: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; SSE: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; SSE: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; SSE: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; SSE: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i32, ptr %inB, align 4
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX1: LV: Found an estimated cost of 2 for VF 8 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX1: LV: Found an estimated cost of 4 for VF 16 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX1: LV: Found an estimated cost of 8 for VF 32 For instruction: %valB.loaded = load i32, i32* %inB, align 4
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX1: LV: Found an estimated cost of 2 for VF 8 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX1: LV: Found an estimated cost of 4 for VF 16 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX1: LV: Found an estimated cost of 8 for VF 32 For instruction: %valB.loaded = load i32, ptr %inB, align 4
;
; AVX2-LABEL: 'test'
-; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX2: LV: Found an estimated cost of 2 for VF 4 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX2: LV: Found an estimated cost of 2 for VF 8 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX2: LV: Found an estimated cost of 4 for VF 16 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX2: LV: Found an estimated cost of 8 for VF 32 For instruction: %valB.loaded = load i32, i32* %inB, align 4
+; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX2: LV: Found an estimated cost of 2 for VF 4 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX2: LV: Found an estimated cost of 2 for VF 8 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 16 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 32 For instruction: %valB.loaded = load i32, ptr %inB, align 4
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX512: LV: Found an estimated cost of 1 for VF 8 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX512: LV: Found an estimated cost of 1 for VF 16 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX512: LV: Found an estimated cost of 2 for VF 32 For instruction: %valB.loaded = load i32, i32* %inB, align 4
-; AVX512: LV: Found an estimated cost of 4 for VF 64 For instruction: %valB.loaded = load i32, i32* %inB, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 8 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 16 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX512: LV: Found an estimated cost of 2 for VF 32 For instruction: %valB.loaded = load i32, ptr %inB, align 4
+; AVX512: LV: Found an estimated cost of 4 for VF 64 For instruction: %valB.loaded = load i32, ptr %inB, align 4
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %end ]
- %inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
- %valA = load i8, i8* %inA
+ %inA = getelementptr inbounds [1024 x i8], ptr @A, i64 0, i64 %iv
+ %valA = load i8, ptr %inA
%canLoad = icmp ne i8 %valA, 0
br i1 %canLoad, label %load, label %mask
load:
- %inB = getelementptr inbounds [1024 x i32], [1024 x i32]* %B, i64 0, i64 %iv
- %valB.loaded = load i32, i32* %inB
+ %inB = getelementptr inbounds [1024 x i32], ptr %B, i64 0, i64 %iv
+ %valB.loaded = load i32, ptr %inB
br label %end
mask:
end:
%valB = phi i32 [ %valB.loaded, %load ], [ 0, %mask ]
- %out = getelementptr inbounds [1024 x i32], [1024 x i32]* @C, i64 0, i64 %iv
- store i32 %valB, i32* %out
+ %out = getelementptr inbounds [1024 x i32], ptr @C, i64 0, i64 %iv
+ store i32 %valB, ptr %out
%iv.next = add nuw nsw i64 %iv, 1
%cmp = icmp ult i64 %iv.next, 1024
@A = global [1024 x i8] zeroinitializer, align 128
@C = global [1024 x i64] zeroinitializer, align 128
-define void @test([1024 x i64]* %B) {
+define void @test(ptr %B) {
; SSE-LABEL: 'test'
-; SSE: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; SSE: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; SSE: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; SSE: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; SSE: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i64, i64* %inB, align 8
+; SSE: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; SSE: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; SSE: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; SSE: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; SSE: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i64, ptr %inB, align 8
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX1: LV: Found an estimated cost of 4 for VF 8 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX1: LV: Found an estimated cost of 8 for VF 16 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX1: LV: Found an estimated cost of 16 for VF 32 For instruction: %valB.loaded = load i64, i64* %inB, align 8
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX1: LV: Found an estimated cost of 4 for VF 8 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX1: LV: Found an estimated cost of 8 for VF 16 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX1: LV: Found an estimated cost of 16 for VF 32 For instruction: %valB.loaded = load i64, ptr %inB, align 8
;
; AVX2-LABEL: 'test'
-; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX2: LV: Found an estimated cost of 2 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX2: LV: Found an estimated cost of 4 for VF 8 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX2: LV: Found an estimated cost of 8 for VF 16 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX2: LV: Found an estimated cost of 16 for VF 32 For instruction: %valB.loaded = load i64, i64* %inB, align 8
+; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX2: LV: Found an estimated cost of 2 for VF 4 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 8 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX2: LV: Found an estimated cost of 8 for VF 16 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX2: LV: Found an estimated cost of 16 for VF 32 For instruction: %valB.loaded = load i64, ptr %inB, align 8
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX512: LV: Found an estimated cost of 1 for VF 8 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX512: LV: Found an estimated cost of 2 for VF 16 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX512: LV: Found an estimated cost of 4 for VF 32 For instruction: %valB.loaded = load i64, i64* %inB, align 8
-; AVX512: LV: Found an estimated cost of 8 for VF 64 For instruction: %valB.loaded = load i64, i64* %inB, align 8
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX512: LV: Found an estimated cost of 1 for VF 8 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX512: LV: Found an estimated cost of 2 for VF 16 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX512: LV: Found an estimated cost of 4 for VF 32 For instruction: %valB.loaded = load i64, ptr %inB, align 8
+; AVX512: LV: Found an estimated cost of 8 for VF 64 For instruction: %valB.loaded = load i64, ptr %inB, align 8
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %end ]
- %inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
- %valA = load i8, i8* %inA
+ %inA = getelementptr inbounds [1024 x i8], ptr @A, i64 0, i64 %iv
+ %valA = load i8, ptr %inA
%canLoad = icmp ne i8 %valA, 0
br i1 %canLoad, label %load, label %mask
load:
- %inB = getelementptr inbounds [1024 x i64], [1024 x i64]* %B, i64 0, i64 %iv
- %valB.loaded = load i64, i64* %inB
+ %inB = getelementptr inbounds [1024 x i64], ptr %B, i64 0, i64 %iv
+ %valB.loaded = load i64, ptr %inB
br label %end
mask:
end:
%valB = phi i64 [ %valB.loaded, %load ], [ 0, %mask ]
- %out = getelementptr inbounds [1024 x i64], [1024 x i64]* @C, i64 0, i64 %iv
- store i64 %valB, i64* %out
+ %out = getelementptr inbounds [1024 x i64], ptr @C, i64 0, i64 %iv
+ store i64 %valB, ptr %out
%iv.next = add nuw nsw i64 %iv, 1
%cmp = icmp ult i64 %iv.next, 1024
@A = global [1024 x i8] zeroinitializer, align 128
@C = global [1024 x i8] zeroinitializer, align 128
-define void @test([1024 x i8]* %B) {
+define void @test(ptr %B) {
; SSE-LABEL: 'test'
-; SSE: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i8, i8* %inB, align 1
-; SSE: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i8, i8* %inB, align 1
-; SSE: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i8, i8* %inB, align 1
-; SSE: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i8, i8* %inB, align 1
-; SSE: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i8, i8* %inB, align 1
+; SSE: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i8, ptr %inB, align 1
+; SSE: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i8, ptr %inB, align 1
+; SSE: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i8, ptr %inB, align 1
+; SSE: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i8, ptr %inB, align 1
+; SSE: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i8, ptr %inB, align 1
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i8, i8* %inB, align 1
-; AVX1: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i8, i8* %inB, align 1
-; AVX1: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i8, i8* %inB, align 1
-; AVX1: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i8, i8* %inB, align 1
-; AVX1: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i8, i8* %inB, align 1
-; AVX1: LV: Found an estimated cost of 3000000 for VF 32 For instruction: %valB.loaded = load i8, i8* %inB, align 1
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i8, ptr %inB, align 1
+; AVX1: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i8, ptr %inB, align 1
+; AVX1: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i8, ptr %inB, align 1
+; AVX1: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i8, ptr %inB, align 1
+; AVX1: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i8, ptr %inB, align 1
+; AVX1: LV: Found an estimated cost of 3000000 for VF 32 For instruction: %valB.loaded = load i8, ptr %inB, align 1
;
; AVX2-LABEL: 'test'
-; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i8, i8* %inB, align 1
-; AVX2: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i8, i8* %inB, align 1
-; AVX2: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i8, i8* %inB, align 1
-; AVX2: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i8, i8* %inB, align 1
-; AVX2: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i8, i8* %inB, align 1
-; AVX2: LV: Found an estimated cost of 3000000 for VF 32 For instruction: %valB.loaded = load i8, i8* %inB, align 1
+; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i8, ptr %inB, align 1
+; AVX2: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i8, ptr %inB, align 1
+; AVX2: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %valB.loaded = load i8, ptr %inB, align 1
+; AVX2: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %valB.loaded = load i8, ptr %inB, align 1
+; AVX2: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %valB.loaded = load i8, ptr %inB, align 1
+; AVX2: LV: Found an estimated cost of 3000000 for VF 32 For instruction: %valB.loaded = load i8, ptr %inB, align 1
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i8, i8* %inB, align 1
-; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: %valB.loaded = load i8, i8* %inB, align 1
-; AVX512: LV: Found an estimated cost of 2 for VF 4 For instruction: %valB.loaded = load i8, i8* %inB, align 1
-; AVX512: LV: Found an estimated cost of 2 for VF 8 For instruction: %valB.loaded = load i8, i8* %inB, align 1
-; AVX512: LV: Found an estimated cost of 1 for VF 16 For instruction: %valB.loaded = load i8, i8* %inB, align 1
-; AVX512: LV: Found an estimated cost of 1 for VF 32 For instruction: %valB.loaded = load i8, i8* %inB, align 1
-; AVX512: LV: Found an estimated cost of 1 for VF 64 For instruction: %valB.loaded = load i8, i8* %inB, align 1
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i8, ptr %inB, align 1
+; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: %valB.loaded = load i8, ptr %inB, align 1
+; AVX512: LV: Found an estimated cost of 2 for VF 4 For instruction: %valB.loaded = load i8, ptr %inB, align 1
+; AVX512: LV: Found an estimated cost of 2 for VF 8 For instruction: %valB.loaded = load i8, ptr %inB, align 1
+; AVX512: LV: Found an estimated cost of 1 for VF 16 For instruction: %valB.loaded = load i8, ptr %inB, align 1
+; AVX512: LV: Found an estimated cost of 1 for VF 32 For instruction: %valB.loaded = load i8, ptr %inB, align 1
+; AVX512: LV: Found an estimated cost of 1 for VF 64 For instruction: %valB.loaded = load i8, ptr %inB, align 1
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %end ]
- %inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
- %valA = load i8, i8* %inA
+ %inA = getelementptr inbounds [1024 x i8], ptr @A, i64 0, i64 %iv
+ %valA = load i8, ptr %inA
%canLoad = icmp ne i8 %valA, 0
br i1 %canLoad, label %load, label %mask
load:
- %inB = getelementptr inbounds [1024 x i8], [1024 x i8]* %B, i64 0, i64 %iv
- %valB.loaded = load i8, i8* %inB
+ %inB = getelementptr inbounds [1024 x i8], ptr %B, i64 0, i64 %iv
+ %valB.loaded = load i8, ptr %inB
br label %end
mask:
end:
%valB = phi i8 [ %valB.loaded, %load ], [ 0, %mask ]
- %out = getelementptr inbounds [1024 x i8], [1024 x i8]* @C, i64 0, i64 %iv
- store i8 %valB, i8* %out
+ %out = getelementptr inbounds [1024 x i8], ptr @C, i64 0, i64 %iv
+ store i8 %valB, ptr %out
%iv.next = add nuw nsw i64 %iv, 1
%cmp = icmp ult i64 %iv.next, 1024
define void @test() {
; SSE2-LABEL: 'test'
-; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4
-; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, i32* %out, align 4
-; SSE2: LV: Found an estimated cost of 5 for VF 4 For instruction: store i32 %valB, i32* %out, align 4
-; SSE2: LV: Found an estimated cost of 11 for VF 8 For instruction: store i32 %valB, i32* %out, align 4
-; SSE2: LV: Found an estimated cost of 22 for VF 16 For instruction: store i32 %valB, i32* %out, align 4
+; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
+; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
+; SSE2: LV: Found an estimated cost of 5 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
+; SSE2: LV: Found an estimated cost of 11 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
+; SSE2: LV: Found an estimated cost of 22 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
;
; SSE42-LABEL: 'test'
-; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4
-; SSE42: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, i32* %out, align 4
-; SSE42: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %valB, i32* %out, align 4
-; SSE42: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, i32* %out, align 4
-; SSE42: LV: Found an estimated cost of 16 for VF 16 For instruction: store i32 %valB, i32* %out, align 4
+; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
+; SSE42: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
+; SSE42: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
+; SSE42: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
+; SSE42: LV: Found an estimated cost of 16 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4
-; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, i32* %out, align 4
-; AVX1: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %valB, i32* %out, align 4
-; AVX1: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, i32* %out, align 4
-; AVX1: LV: Found an estimated cost of 17 for VF 16 For instruction: store i32 %valB, i32* %out, align 4
-; AVX1: LV: Found an estimated cost of 34 for VF 32 For instruction: store i32 %valB, i32* %out, align 4
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
+; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
+; AVX1: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
+; AVX1: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
+; AVX1: LV: Found an estimated cost of 17 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
+; AVX1: LV: Found an estimated cost of 34 for VF 32 For instruction: store i32 %valB, ptr %out, align 4
;
; AVX2-LABEL: 'test'
-; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4
-; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, i32* %out, align 4
-; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %valB, i32* %out, align 4
-; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, i32* %out, align 4
-; AVX2: LV: Found an estimated cost of 17 for VF 16 For instruction: store i32 %valB, i32* %out, align 4
-; AVX2: LV: Found an estimated cost of 34 for VF 32 For instruction: store i32 %valB, i32* %out, align 4
+; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
+; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
+; AVX2: LV: Found an estimated cost of 17 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 32 For instruction: store i32 %valB, ptr %out, align 4
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4
-; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store i32 %valB, i32* %out, align 4
-; AVX512: LV: Found an estimated cost of 10 for VF 4 For instruction: store i32 %valB, i32* %out, align 4
-; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i32 %valB, i32* %out, align 4
-; AVX512: LV: Found an estimated cost of 18 for VF 16 For instruction: store i32 %valB, i32* %out, align 4
-; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: store i32 %valB, i32* %out, align 4
-; AVX512: LV: Found an estimated cost of 72 for VF 64 For instruction: store i32 %valB, i32* %out, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
+; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
+; AVX512: LV: Found an estimated cost of 10 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
+; AVX512: LV: Found an estimated cost of 18 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
+; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: store i32 %valB, ptr %out, align 4
+; AVX512: LV: Found an estimated cost of 72 for VF 64 For instruction: store i32 %valB, ptr %out, align 4
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %end ]
- %inB = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %iv
- %valB = load i32, i32* %inB
+ %inB = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %iv
+ %valB = load i32, ptr %inB
- %inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
- %valA = load i8, i8* %inA
+ %inA = getelementptr inbounds [1024 x i8], ptr @A, i64 0, i64 %iv
+ %valA = load i8, ptr %inA
%canStore = icmp ne i8 %valA, 0
br i1 %canStore, label %store, label %mask
store:
%valA.ext = sext i8 %valA to i64
- %out = getelementptr inbounds [1024 x i32], [1024 x i32]* @C, i64 0, i64 %valA.ext
- store i32 %valB, i32* %out
+ %out = getelementptr inbounds [1024 x i32], ptr @C, i64 0, i64 %valA.ext
+ store i32 %valB, ptr %out
br label %end
mask:
define void @test() {
; SSE2-LABEL: 'test'
-; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8
-; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, i64* %out, align 8
-; SSE2: LV: Found an estimated cost of 5 for VF 4 For instruction: store i64 %valB, i64* %out, align 8
-; SSE2: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, i64* %out, align 8
-; SSE2: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, i64* %out, align 8
+; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
+; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
+; SSE2: LV: Found an estimated cost of 5 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
+; SSE2: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
+; SSE2: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
;
; SSE42-LABEL: 'test'
-; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8
-; SSE42: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, i64* %out, align 8
-; SSE42: LV: Found an estimated cost of 4 for VF 4 For instruction: store i64 %valB, i64* %out, align 8
-; SSE42: LV: Found an estimated cost of 8 for VF 8 For instruction: store i64 %valB, i64* %out, align 8
-; SSE42: LV: Found an estimated cost of 16 for VF 16 For instruction: store i64 %valB, i64* %out, align 8
+; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
+; SSE42: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
+; SSE42: LV: Found an estimated cost of 4 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
+; SSE42: LV: Found an estimated cost of 8 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
+; SSE42: LV: Found an estimated cost of 16 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8
-; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, i64* %out, align 8
-; AVX1: LV: Found an estimated cost of 4 for VF 4 For instruction: store i64 %valB, i64* %out, align 8
-; AVX1: LV: Found an estimated cost of 9 for VF 8 For instruction: store i64 %valB, i64* %out, align 8
-; AVX1: LV: Found an estimated cost of 18 for VF 16 For instruction: store i64 %valB, i64* %out, align 8
-; AVX1: LV: Found an estimated cost of 36 for VF 32 For instruction: store i64 %valB, i64* %out, align 8
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
+; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
+; AVX1: LV: Found an estimated cost of 4 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
+; AVX1: LV: Found an estimated cost of 9 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
+; AVX1: LV: Found an estimated cost of 18 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
+; AVX1: LV: Found an estimated cost of 36 for VF 32 For instruction: store i64 %valB, ptr %out, align 8
;
; AVX2-LABEL: 'test'
-; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8
-; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, i64* %out, align 8
-; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: store i64 %valB, i64* %out, align 8
-; AVX2: LV: Found an estimated cost of 9 for VF 8 For instruction: store i64 %valB, i64* %out, align 8
-; AVX2: LV: Found an estimated cost of 18 for VF 16 For instruction: store i64 %valB, i64* %out, align 8
-; AVX2: LV: Found an estimated cost of 36 for VF 32 For instruction: store i64 %valB, i64* %out, align 8
+; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
+; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
+; AVX2: LV: Found an estimated cost of 9 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
+; AVX2: LV: Found an estimated cost of 18 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
+; AVX2: LV: Found an estimated cost of 36 for VF 32 For instruction: store i64 %valB, ptr %out, align 8
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8
-; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store i64 %valB, i64* %out, align 8
-; AVX512: LV: Found an estimated cost of 11 for VF 4 For instruction: store i64 %valB, i64* %out, align 8
-; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, i64* %out, align 8
-; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, i64* %out, align 8
-; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: store i64 %valB, i64* %out, align 8
-; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: store i64 %valB, i64* %out, align 8
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
+; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
+; AVX512: LV: Found an estimated cost of 11 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: store i64 %valB, ptr %out, align 8
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: store i64 %valB, ptr %out, align 8
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %end ]
- %inB = getelementptr inbounds [1024 x i64], [1024 x i64]* @B, i64 0, i64 %iv
- %valB = load i64, i64* %inB
+ %inB = getelementptr inbounds [1024 x i64], ptr @B, i64 0, i64 %iv
+ %valB = load i64, ptr %inB
- %inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
- %valA = load i8, i8* %inA
+ %inA = getelementptr inbounds [1024 x i8], ptr @A, i64 0, i64 %iv
+ %valA = load i8, ptr %inA
%canStore = icmp ne i8 %valA, 0
br i1 %canStore, label %store, label %mask
store:
%valA.ext = sext i8 %valA to i64
- %out = getelementptr inbounds [1024 x i64], [1024 x i64]* @C, i64 0, i64 %valA.ext
- store i64 %valB, i64* %out
+ %out = getelementptr inbounds [1024 x i64], ptr @C, i64 0, i64 %valA.ext
+ store i64 %valB, ptr %out
br label %end
mask:
@A = global [1024 x i8] zeroinitializer, align 128
@B = global [1024 x i16] zeroinitializer, align 128
-define void @test([1024 x i16]* %C) {
+define void @test(ptr %C) {
; SSE-LABEL: 'test'
-; SSE: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, i16* %out, align 2
-; SSE: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %valB, i16* %out, align 2
-; SSE: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %valB, i16* %out, align 2
-; SSE: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %valB, i16* %out, align 2
-; SSE: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %valB, i16* %out, align 2
+; SSE: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2
+; SSE: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %valB, ptr %out, align 2
+; SSE: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %valB, ptr %out, align 2
+; SSE: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %valB, ptr %out, align 2
+; SSE: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %valB, ptr %out, align 2
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, i16* %out, align 2
-; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %valB, i16* %out, align 2
-; AVX1: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %valB, i16* %out, align 2
-; AVX1: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %valB, i16* %out, align 2
-; AVX1: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %valB, i16* %out, align 2
-; AVX1: LV: Found an estimated cost of 33 for VF 32 For instruction: store i16 %valB, i16* %out, align 2
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2
+; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %valB, ptr %out, align 2
+; AVX1: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %valB, ptr %out, align 2
+; AVX1: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %valB, ptr %out, align 2
+; AVX1: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %valB, ptr %out, align 2
+; AVX1: LV: Found an estimated cost of 33 for VF 32 For instruction: store i16 %valB, ptr %out, align 2
;
; AVX2-LABEL: 'test'
-; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, i16* %out, align 2
-; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %valB, i16* %out, align 2
-; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %valB, i16* %out, align 2
-; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %valB, i16* %out, align 2
-; AVX2: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %valB, i16* %out, align 2
-; AVX2: LV: Found an estimated cost of 33 for VF 32 For instruction: store i16 %valB, i16* %out, align 2
+; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2
+; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %valB, ptr %out, align 2
+; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %valB, ptr %out, align 2
+; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %valB, ptr %out, align 2
+; AVX2: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %valB, ptr %out, align 2
+; AVX2: LV: Found an estimated cost of 33 for VF 32 For instruction: store i16 %valB, ptr %out, align 2
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, i16* %out, align 2
-; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %valB, i16* %out, align 2
-; AVX512: LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %valB, i16* %out, align 2
-; AVX512: LV: Found an estimated cost of 1 for VF 8 For instruction: store i16 %valB, i16* %out, align 2
-; AVX512: LV: Found an estimated cost of 1 for VF 16 For instruction: store i16 %valB, i16* %out, align 2
-; AVX512: LV: Found an estimated cost of 1 for VF 32 For instruction: store i16 %valB, i16* %out, align 2
-; AVX512: LV: Found an estimated cost of 2 for VF 64 For instruction: store i16 %valB, i16* %out, align 2
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2
+; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %valB, ptr %out, align 2
+; AVX512: LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %valB, ptr %out, align 2
+; AVX512: LV: Found an estimated cost of 1 for VF 8 For instruction: store i16 %valB, ptr %out, align 2
+; AVX512: LV: Found an estimated cost of 1 for VF 16 For instruction: store i16 %valB, ptr %out, align 2
+; AVX512: LV: Found an estimated cost of 1 for VF 32 For instruction: store i16 %valB, ptr %out, align 2
+; AVX512: LV: Found an estimated cost of 2 for VF 64 For instruction: store i16 %valB, ptr %out, align 2
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %end ]
- %inB = getelementptr inbounds [1024 x i16], [1024 x i16]* @B, i64 0, i64 %iv
- %valB = load i16, i16* %inB
+ %inB = getelementptr inbounds [1024 x i16], ptr @B, i64 0, i64 %iv
+ %valB = load i16, ptr %inB
- %inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
- %valA = load i8, i8* %inA
+ %inA = getelementptr inbounds [1024 x i8], ptr @A, i64 0, i64 %iv
+ %valA = load i8, ptr %inA
%canStore = icmp ne i8 %valA, 0
br i1 %canStore, label %store, label %mask
store:
- %out = getelementptr inbounds [1024 x i16], [1024 x i16]* %C, i64 0, i64 %iv
- store i16 %valB, i16* %out
+ %out = getelementptr inbounds [1024 x i16], ptr %C, i64 0, i64 %iv
+ store i16 %valB, ptr %out
br label %end
mask:
@A = global [1024 x i8] zeroinitializer, align 128
@B = global [1024 x i32] zeroinitializer, align 128
-define void @test([1024 x i32]* %C) {
+define void @test(ptr %C) {
; SSE2-LABEL: 'test'
-; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4
-; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, i32* %out, align 4
-; SSE2: LV: Found an estimated cost of 5 for VF 4 For instruction: store i32 %valB, i32* %out, align 4
-; SSE2: LV: Found an estimated cost of 11 for VF 8 For instruction: store i32 %valB, i32* %out, align 4
-; SSE2: LV: Found an estimated cost of 22 for VF 16 For instruction: store i32 %valB, i32* %out, align 4
+; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
+; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
+; SSE2: LV: Found an estimated cost of 5 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
+; SSE2: LV: Found an estimated cost of 11 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
+; SSE2: LV: Found an estimated cost of 22 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
;
; SSE42-LABEL: 'test'
-; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4
-; SSE42: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, i32* %out, align 4
-; SSE42: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %valB, i32* %out, align 4
-; SSE42: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, i32* %out, align 4
-; SSE42: LV: Found an estimated cost of 16 for VF 16 For instruction: store i32 %valB, i32* %out, align 4
+; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
+; SSE42: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
+; SSE42: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
+; SSE42: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
+; SSE42: LV: Found an estimated cost of 16 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4
-; AVX1: LV: Found an estimated cost of 9 for VF 2 For instruction: store i32 %valB, i32* %out, align 4
-; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: store i32 %valB, i32* %out, align 4
-; AVX1: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, i32* %out, align 4
-; AVX1: LV: Found an estimated cost of 16 for VF 16 For instruction: store i32 %valB, i32* %out, align 4
-; AVX1: LV: Found an estimated cost of 32 for VF 32 For instruction: store i32 %valB, i32* %out, align 4
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
+; AVX1: LV: Found an estimated cost of 9 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
+; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
+; AVX1: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
+; AVX1: LV: Found an estimated cost of 16 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
+; AVX1: LV: Found an estimated cost of 32 for VF 32 For instruction: store i32 %valB, ptr %out, align 4
;
; AVX2-LABEL: 'test'
-; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4
-; AVX2: LV: Found an estimated cost of 9 for VF 2 For instruction: store i32 %valB, i32* %out, align 4
-; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: store i32 %valB, i32* %out, align 4
-; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, i32* %out, align 4
-; AVX2: LV: Found an estimated cost of 16 for VF 16 For instruction: store i32 %valB, i32* %out, align 4
-; AVX2: LV: Found an estimated cost of 32 for VF 32 For instruction: store i32 %valB, i32* %out, align 4
+; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
+; AVX2: LV: Found an estimated cost of 9 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
+; AVX2: LV: Found an estimated cost of 16 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
+; AVX2: LV: Found an estimated cost of 32 for VF 32 For instruction: store i32 %valB, ptr %out, align 4
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4
-; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, i32* %out, align 4
-; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: store i32 %valB, i32* %out, align 4
-; AVX512: LV: Found an estimated cost of 1 for VF 8 For instruction: store i32 %valB, i32* %out, align 4
-; AVX512: LV: Found an estimated cost of 1 for VF 16 For instruction: store i32 %valB, i32* %out, align 4
-; AVX512: LV: Found an estimated cost of 2 for VF 32 For instruction: store i32 %valB, i32* %out, align 4
-; AVX512: LV: Found an estimated cost of 4 for VF 64 For instruction: store i32 %valB, i32* %out, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
+; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
+; AVX512: LV: Found an estimated cost of 2 for VF 32 For instruction: store i32 %valB, ptr %out, align 4
+; AVX512: LV: Found an estimated cost of 4 for VF 64 For instruction: store i32 %valB, ptr %out, align 4
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %end ]
- %inB = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %iv
- %valB = load i32, i32* %inB
+ %inB = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %iv
+ %valB = load i32, ptr %inB
- %inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
- %valA = load i8, i8* %inA
+ %inA = getelementptr inbounds [1024 x i8], ptr @A, i64 0, i64 %iv
+ %valA = load i8, ptr %inA
%canStore = icmp ne i8 %valA, 0
br i1 %canStore, label %store, label %mask
store:
- %out = getelementptr inbounds [1024 x i32], [1024 x i32]* %C, i64 0, i64 %iv
- store i32 %valB, i32* %out
+ %out = getelementptr inbounds [1024 x i32], ptr %C, i64 0, i64 %iv
+ store i32 %valB, ptr %out
br label %end
mask:
@A = global [1024 x i8] zeroinitializer, align 128
@B = global [1024 x i64] zeroinitializer, align 128
-define void @test([1024 x i64]* %C) {
+define void @test(ptr %C) {
; SSE2-LABEL: 'test'
-; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8
-; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, i64* %out, align 8
-; SSE2: LV: Found an estimated cost of 5 for VF 4 For instruction: store i64 %valB, i64* %out, align 8
-; SSE2: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, i64* %out, align 8
-; SSE2: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, i64* %out, align 8
+; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
+; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
+; SSE2: LV: Found an estimated cost of 5 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
+; SSE2: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
+; SSE2: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
;
; SSE42-LABEL: 'test'
-; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8
-; SSE42: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, i64* %out, align 8
-; SSE42: LV: Found an estimated cost of 4 for VF 4 For instruction: store i64 %valB, i64* %out, align 8
-; SSE42: LV: Found an estimated cost of 8 for VF 8 For instruction: store i64 %valB, i64* %out, align 8
-; SSE42: LV: Found an estimated cost of 16 for VF 16 For instruction: store i64 %valB, i64* %out, align 8
+; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
+; SSE42: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
+; SSE42: LV: Found an estimated cost of 4 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
+; SSE42: LV: Found an estimated cost of 8 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
+; SSE42: LV: Found an estimated cost of 16 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8
-; AVX1: LV: Found an estimated cost of 8 for VF 2 For instruction: store i64 %valB, i64* %out, align 8
-; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: store i64 %valB, i64* %out, align 8
-; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: store i64 %valB, i64* %out, align 8
-; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: store i64 %valB, i64* %out, align 8
-; AVX1: LV: Found an estimated cost of 64 for VF 32 For instruction: store i64 %valB, i64* %out, align 8
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
+; AVX1: LV: Found an estimated cost of 8 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
+; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
+; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
+; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
+; AVX1: LV: Found an estimated cost of 64 for VF 32 For instruction: store i64 %valB, ptr %out, align 8
;
; AVX2-LABEL: 'test'
-; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8
-; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: store i64 %valB, i64* %out, align 8
-; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: store i64 %valB, i64* %out, align 8
-; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: store i64 %valB, i64* %out, align 8
-; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: store i64 %valB, i64* %out, align 8
-; AVX2: LV: Found an estimated cost of 64 for VF 32 For instruction: store i64 %valB, i64* %out, align 8
+; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
+; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
+; AVX2: LV: Found an estimated cost of 64 for VF 32 For instruction: store i64 %valB, ptr %out, align 8
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8
-; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction: store i64 %valB, i64* %out, align 8
-; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: store i64 %valB, i64* %out, align 8
-; AVX512: LV: Found an estimated cost of 1 for VF 8 For instruction: store i64 %valB, i64* %out, align 8
-; AVX512: LV: Found an estimated cost of 2 for VF 16 For instruction: store i64 %valB, i64* %out, align 8
-; AVX512: LV: Found an estimated cost of 4 for VF 32 For instruction: store i64 %valB, i64* %out, align 8
-; AVX512: LV: Found an estimated cost of 8 for VF 64 For instruction: store i64 %valB, i64* %out, align 8
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
+; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
+; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
+; AVX512: LV: Found an estimated cost of 1 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
+; AVX512: LV: Found an estimated cost of 2 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
+; AVX512: LV: Found an estimated cost of 4 for VF 32 For instruction: store i64 %valB, ptr %out, align 8
+; AVX512: LV: Found an estimated cost of 8 for VF 64 For instruction: store i64 %valB, ptr %out, align 8
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %end ]
- %inB = getelementptr inbounds [1024 x i64], [1024 x i64]* @B, i64 0, i64 %iv
- %valB = load i64, i64* %inB
+ %inB = getelementptr inbounds [1024 x i64], ptr @B, i64 0, i64 %iv
+ %valB = load i64, ptr %inB
- %inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
- %valA = load i8, i8* %inA
+ %inA = getelementptr inbounds [1024 x i8], ptr @A, i64 0, i64 %iv
+ %valA = load i8, ptr %inA
%canStore = icmp ne i8 %valA, 0
br i1 %canStore, label %store, label %mask
store:
- %out = getelementptr inbounds [1024 x i64], [1024 x i64]* %C, i64 0, i64 %iv
- store i64 %valB, i64* %out
+ %out = getelementptr inbounds [1024 x i64], ptr %C, i64 0, i64 %iv
+ store i64 %valB, ptr %out
br label %end
mask:
@A = global [1024 x i8] zeroinitializer, align 128
@B = global [1024 x i8] zeroinitializer, align 128
-define void @test([1024 x i8]* %C) {
+define void @test(ptr %C) {
; SSE2-LABEL: 'test'
-; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1
-; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %valB, i8* %out, align 1
-; SSE2: LV: Found an estimated cost of 5 for VF 4 For instruction: store i8 %valB, i8* %out, align 1
-; SSE2: LV: Found an estimated cost of 11 for VF 8 For instruction: store i8 %valB, i8* %out, align 1
-; SSE2: LV: Found an estimated cost of 23 for VF 16 For instruction: store i8 %valB, i8* %out, align 1
+; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1
+; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %valB, ptr %out, align 1
+; SSE2: LV: Found an estimated cost of 5 for VF 4 For instruction: store i8 %valB, ptr %out, align 1
+; SSE2: LV: Found an estimated cost of 11 for VF 8 For instruction: store i8 %valB, ptr %out, align 1
+; SSE2: LV: Found an estimated cost of 23 for VF 16 For instruction: store i8 %valB, ptr %out, align 1
;
; SSE42-LABEL: 'test'
-; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1
-; SSE42: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %valB, i8* %out, align 1
-; SSE42: LV: Found an estimated cost of 4 for VF 4 For instruction: store i8 %valB, i8* %out, align 1
-; SSE42: LV: Found an estimated cost of 8 for VF 8 For instruction: store i8 %valB, i8* %out, align 1
-; SSE42: LV: Found an estimated cost of 16 for VF 16 For instruction: store i8 %valB, i8* %out, align 1
+; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1
+; SSE42: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %valB, ptr %out, align 1
+; SSE42: LV: Found an estimated cost of 4 for VF 4 For instruction: store i8 %valB, ptr %out, align 1
+; SSE42: LV: Found an estimated cost of 8 for VF 8 For instruction: store i8 %valB, ptr %out, align 1
+; SSE42: LV: Found an estimated cost of 16 for VF 16 For instruction: store i8 %valB, ptr %out, align 1
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1
-; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %valB, i8* %out, align 1
-; AVX1: LV: Found an estimated cost of 4 for VF 4 For instruction: store i8 %valB, i8* %out, align 1
-; AVX1: LV: Found an estimated cost of 8 for VF 8 For instruction: store i8 %valB, i8* %out, align 1
-; AVX1: LV: Found an estimated cost of 16 for VF 16 For instruction: store i8 %valB, i8* %out, align 1
-; AVX1: LV: Found an estimated cost of 32 for VF 32 For instruction: store i8 %valB, i8* %out, align 1
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1
+; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %valB, ptr %out, align 1
+; AVX1: LV: Found an estimated cost of 4 for VF 4 For instruction: store i8 %valB, ptr %out, align 1
+; AVX1: LV: Found an estimated cost of 8 for VF 8 For instruction: store i8 %valB, ptr %out, align 1
+; AVX1: LV: Found an estimated cost of 16 for VF 16 For instruction: store i8 %valB, ptr %out, align 1
+; AVX1: LV: Found an estimated cost of 32 for VF 32 For instruction: store i8 %valB, ptr %out, align 1
;
; AVX2-LABEL: 'test'
-; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1
-; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %valB, i8* %out, align 1
-; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: store i8 %valB, i8* %out, align 1
-; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: store i8 %valB, i8* %out, align 1
-; AVX2: LV: Found an estimated cost of 16 for VF 16 For instruction: store i8 %valB, i8* %out, align 1
-; AVX2: LV: Found an estimated cost of 32 for VF 32 For instruction: store i8 %valB, i8* %out, align 1
+; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1
+; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %valB, ptr %out, align 1
+; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: store i8 %valB, ptr %out, align 1
+; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: store i8 %valB, ptr %out, align 1
+; AVX2: LV: Found an estimated cost of 16 for VF 16 For instruction: store i8 %valB, ptr %out, align 1
+; AVX2: LV: Found an estimated cost of 32 for VF 32 For instruction: store i8 %valB, ptr %out, align 1
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1
-; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %valB, i8* %out, align 1
-; AVX512: LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %valB, i8* %out, align 1
-; AVX512: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %valB, i8* %out, align 1
-; AVX512: LV: Found an estimated cost of 1 for VF 16 For instruction: store i8 %valB, i8* %out, align 1
-; AVX512: LV: Found an estimated cost of 1 for VF 32 For instruction: store i8 %valB, i8* %out, align 1
-; AVX512: LV: Found an estimated cost of 1 for VF 64 For instruction: store i8 %valB, i8* %out, align 1
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1
+; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %valB, ptr %out, align 1
+; AVX512: LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %valB, ptr %out, align 1
+; AVX512: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %valB, ptr %out, align 1
+; AVX512: LV: Found an estimated cost of 1 for VF 16 For instruction: store i8 %valB, ptr %out, align 1
+; AVX512: LV: Found an estimated cost of 1 for VF 32 For instruction: store i8 %valB, ptr %out, align 1
+; AVX512: LV: Found an estimated cost of 1 for VF 64 For instruction: store i8 %valB, ptr %out, align 1
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %end ]
- %inB = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv
- %valB = load i8, i8* %inB
+ %inB = getelementptr inbounds [1024 x i8], ptr @B, i64 0, i64 %iv
+ %valB = load i8, ptr %inB
- %inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
- %valA = load i8, i8* %inA
+ %inA = getelementptr inbounds [1024 x i8], ptr @A, i64 0, i64 %iv
+ %valA = load i8, ptr %inA
%canStore = icmp ne i8 %valA, 0
br i1 %canStore, label %store, label %mask
store:
- %out = getelementptr inbounds [1024 x i8], [1024 x i8]* %C, i64 0, i64 %iv
- store i8 %valB, i8* %out
+ %out = getelementptr inbounds [1024 x i8], ptr %C, i64 0, i64 %iv
+ store i8 %valB, ptr %out
br label %end
mask:
define void @test() {
; SSE2-LABEL: 'test'
-; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, i16* %out, align 2
-; SSE2: LV: Found an estimated cost of 28 for VF 2 For instruction: store i16 %valB, i16* %out, align 2
-; SSE2: LV: Found an estimated cost of 56 for VF 4 For instruction: store i16 %valB, i16* %out, align 2
-; SSE2: LV: Found an estimated cost of 112 for VF 8 For instruction: store i16 %valB, i16* %out, align 2
-; SSE2: LV: Found an estimated cost of 224 for VF 16 For instruction: store i16 %valB, i16* %out, align 2
+; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2
+; SSE2: LV: Found an estimated cost of 28 for VF 2 For instruction: store i16 %valB, ptr %out, align 2
+; SSE2: LV: Found an estimated cost of 56 for VF 4 For instruction: store i16 %valB, ptr %out, align 2
+; SSE2: LV: Found an estimated cost of 112 for VF 8 For instruction: store i16 %valB, ptr %out, align 2
+; SSE2: LV: Found an estimated cost of 224 for VF 16 For instruction: store i16 %valB, ptr %out, align 2
;
; SSE42-LABEL: 'test'
-; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, i16* %out, align 2
-; SSE42: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %valB, i16* %out, align 2
-; SSE42: LV: Found an estimated cost of 52 for VF 4 For instruction: store i16 %valB, i16* %out, align 2
-; SSE42: LV: Found an estimated cost of 104 for VF 8 For instruction: store i16 %valB, i16* %out, align 2
-; SSE42: LV: Found an estimated cost of 208 for VF 16 For instruction: store i16 %valB, i16* %out, align 2
+; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2
+; SSE42: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %valB, ptr %out, align 2
+; SSE42: LV: Found an estimated cost of 52 for VF 4 For instruction: store i16 %valB, ptr %out, align 2
+; SSE42: LV: Found an estimated cost of 104 for VF 8 For instruction: store i16 %valB, ptr %out, align 2
+; SSE42: LV: Found an estimated cost of 208 for VF 16 For instruction: store i16 %valB, ptr %out, align 2
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, i16* %out, align 2
-; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %valB, i16* %out, align 2
-; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i16 %valB, i16* %out, align 2
-; AVX1: LV: Found an estimated cost of 106 for VF 8 For instruction: store i16 %valB, i16* %out, align 2
-; AVX1: LV: Found an estimated cost of 213 for VF 16 For instruction: store i16 %valB, i16* %out, align 2
-; AVX1: LV: Found an estimated cost of 426 for VF 32 For instruction: store i16 %valB, i16* %out, align 2
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2
+; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %valB, ptr %out, align 2
+; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i16 %valB, ptr %out, align 2
+; AVX1: LV: Found an estimated cost of 106 for VF 8 For instruction: store i16 %valB, ptr %out, align 2
+; AVX1: LV: Found an estimated cost of 213 for VF 16 For instruction: store i16 %valB, ptr %out, align 2
+; AVX1: LV: Found an estimated cost of 426 for VF 32 For instruction: store i16 %valB, ptr %out, align 2
;
; AVX2-LABEL: 'test'
-; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, i16* %out, align 2
-; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %valB, i16* %out, align 2
-; AVX2: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %valB, i16* %out, align 2
-; AVX2: LV: Found an estimated cost of 26 for VF 8 For instruction: store i16 %valB, i16* %out, align 2
-; AVX2: LV: Found an estimated cost of 53 for VF 16 For instruction: store i16 %valB, i16* %out, align 2
-; AVX2: LV: Found an estimated cost of 106 for VF 32 For instruction: store i16 %valB, i16* %out, align 2
+; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2
+; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %valB, ptr %out, align 2
+; AVX2: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %valB, ptr %out, align 2
+; AVX2: LV: Found an estimated cost of 26 for VF 8 For instruction: store i16 %valB, ptr %out, align 2
+; AVX2: LV: Found an estimated cost of 53 for VF 16 For instruction: store i16 %valB, ptr %out, align 2
+; AVX2: LV: Found an estimated cost of 106 for VF 32 For instruction: store i16 %valB, ptr %out, align 2
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, i16* %out, align 2
-; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %valB, i16* %out, align 2
-; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %valB, i16* %out, align 2
-; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %valB, i16* %out, align 2
-; AVX512: LV: Found an estimated cost of 55 for VF 16 For instruction: store i16 %valB, i16* %out, align 2
-; AVX512: LV: Found an estimated cost of 111 for VF 32 For instruction: store i16 %valB, i16* %out, align 2
-; AVX512: LV: Found an estimated cost of 222 for VF 64 For instruction: store i16 %valB, i16* %out, align 2
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, ptr %out, align 2
+; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %valB, ptr %out, align 2
+; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %valB, ptr %out, align 2
+; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %valB, ptr %out, align 2
+; AVX512: LV: Found an estimated cost of 55 for VF 16 For instruction: store i16 %valB, ptr %out, align 2
+; AVX512: LV: Found an estimated cost of 111 for VF 32 For instruction: store i16 %valB, ptr %out, align 2
+; AVX512: LV: Found an estimated cost of 222 for VF 64 For instruction: store i16 %valB, ptr %out, align 2
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
- %valA = load i8, i8* %inA
+ %inA = getelementptr inbounds [1024 x i8], ptr @A, i64 0, i64 %iv
+ %valA = load i8, ptr %inA
%valA.ext = sext i8 %valA to i64
- %inB = getelementptr inbounds [1024 x i16], [1024 x i16]* @B, i64 0, i64 %iv
- %valB = load i16, i16* %inB
+ %inB = getelementptr inbounds [1024 x i16], ptr @B, i64 0, i64 %iv
+ %valB = load i16, ptr %inB
- %out = getelementptr inbounds [1024 x i16], [1024 x i16]* @C, i64 0, i64 %valA.ext
- store i16 %valB, i16* %out
+ %out = getelementptr inbounds [1024 x i16], ptr @C, i64 0, i64 %valA.ext
+ store i16 %valB, ptr %out
%iv.next = add nuw nsw i64 %iv, 1
%cmp = icmp ult i64 %iv.next, 1024
define void @test() {
; SSE2-LABEL: 'test'
-; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4
-; SSE2: LV: Found an estimated cost of 29 for VF 2 For instruction: store i32 %valB, i32* %out, align 4
-; SSE2: LV: Found an estimated cost of 59 for VF 4 For instruction: store i32 %valB, i32* %out, align 4
-; SSE2: LV: Found an estimated cost of 118 for VF 8 For instruction: store i32 %valB, i32* %out, align 4
-; SSE2: LV: Found an estimated cost of 236 for VF 16 For instruction: store i32 %valB, i32* %out, align 4
+; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
+; SSE2: LV: Found an estimated cost of 29 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
+; SSE2: LV: Found an estimated cost of 59 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
+; SSE2: LV: Found an estimated cost of 118 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
+; SSE2: LV: Found an estimated cost of 236 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
;
; SSE42-LABEL: 'test'
-; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4
-; SSE42: LV: Found an estimated cost of 26 for VF 2 For instruction: store i32 %valB, i32* %out, align 4
-; SSE42: LV: Found an estimated cost of 52 for VF 4 For instruction: store i32 %valB, i32* %out, align 4
-; SSE42: LV: Found an estimated cost of 104 for VF 8 For instruction: store i32 %valB, i32* %out, align 4
-; SSE42: LV: Found an estimated cost of 208 for VF 16 For instruction: store i32 %valB, i32* %out, align 4
+; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
+; SSE42: LV: Found an estimated cost of 26 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
+; SSE42: LV: Found an estimated cost of 52 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
+; SSE42: LV: Found an estimated cost of 104 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
+; SSE42: LV: Found an estimated cost of 208 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4
-; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i32 %valB, i32* %out, align 4
-; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i32 %valB, i32* %out, align 4
-; AVX1: LV: Found an estimated cost of 107 for VF 8 For instruction: store i32 %valB, i32* %out, align 4
-; AVX1: LV: Found an estimated cost of 214 for VF 16 For instruction: store i32 %valB, i32* %out, align 4
-; AVX1: LV: Found an estimated cost of 428 for VF 32 For instruction: store i32 %valB, i32* %out, align 4
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
+; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
+; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
+; AVX1: LV: Found an estimated cost of 107 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
+; AVX1: LV: Found an estimated cost of 214 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
+; AVX1: LV: Found an estimated cost of 428 for VF 32 For instruction: store i32 %valB, ptr %out, align 4
;
; AVX2-LABEL: 'test'
-; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4
-; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i32 %valB, i32* %out, align 4
-; AVX2: LV: Found an estimated cost of 13 for VF 4 For instruction: store i32 %valB, i32* %out, align 4
-; AVX2: LV: Found an estimated cost of 27 for VF 8 For instruction: store i32 %valB, i32* %out, align 4
-; AVX2: LV: Found an estimated cost of 54 for VF 16 For instruction: store i32 %valB, i32* %out, align 4
-; AVX2: LV: Found an estimated cost of 108 for VF 32 For instruction: store i32 %valB, i32* %out, align 4
+; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
+; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
+; AVX2: LV: Found an estimated cost of 13 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
+; AVX2: LV: Found an estimated cost of 27 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
+; AVX2: LV: Found an estimated cost of 54 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
+; AVX2: LV: Found an estimated cost of 108 for VF 32 For instruction: store i32 %valB, ptr %out, align 4
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4
-; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: store i32 %valB, i32* %out, align 4
-; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: store i32 %valB, i32* %out, align 4
-; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i32 %valB, i32* %out, align 4
-; AVX512: LV: Found an estimated cost of 18 for VF 16 For instruction: store i32 %valB, i32* %out, align 4
-; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: store i32 %valB, i32* %out, align 4
-; AVX512: LV: Found an estimated cost of 72 for VF 64 For instruction: store i32 %valB, i32* %out, align 4
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, ptr %out, align 4
+; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: store i32 %valB, ptr %out, align 4
+; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: store i32 %valB, ptr %out, align 4
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i32 %valB, ptr %out, align 4
+; AVX512: LV: Found an estimated cost of 18 for VF 16 For instruction: store i32 %valB, ptr %out, align 4
+; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: store i32 %valB, ptr %out, align 4
+; AVX512: LV: Found an estimated cost of 72 for VF 64 For instruction: store i32 %valB, ptr %out, align 4
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
- %valA = load i8, i8* %inA
+ %inA = getelementptr inbounds [1024 x i8], ptr @A, i64 0, i64 %iv
+ %valA = load i8, ptr %inA
%valA.ext = sext i8 %valA to i64
- %inB = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %iv
- %valB = load i32, i32* %inB
+ %inB = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %iv
+ %valB = load i32, ptr %inB
- %out = getelementptr inbounds [1024 x i32], [1024 x i32]* @C, i64 0, i64 %valA.ext
- store i32 %valB, i32* %out
+ %out = getelementptr inbounds [1024 x i32], ptr @C, i64 0, i64 %valA.ext
+ store i32 %valB, ptr %out
%iv.next = add nuw nsw i64 %iv, 1
%cmp = icmp ult i64 %iv.next, 1024
define void @test() {
; SSE2-LABEL: 'test'
-; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8
-; SSE2: LV: Found an estimated cost of 29 for VF 2 For instruction: store i64 %valB, i64* %out, align 8
-; SSE2: LV: Found an estimated cost of 58 for VF 4 For instruction: store i64 %valB, i64* %out, align 8
-; SSE2: LV: Found an estimated cost of 116 for VF 8 For instruction: store i64 %valB, i64* %out, align 8
-; SSE2: LV: Found an estimated cost of 232 for VF 16 For instruction: store i64 %valB, i64* %out, align 8
+; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
+; SSE2: LV: Found an estimated cost of 29 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
+; SSE2: LV: Found an estimated cost of 58 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
+; SSE2: LV: Found an estimated cost of 116 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
+; SSE2: LV: Found an estimated cost of 232 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
;
; SSE42-LABEL: 'test'
-; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8
-; SSE42: LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %valB, i64* %out, align 8
-; SSE42: LV: Found an estimated cost of 52 for VF 4 For instruction: store i64 %valB, i64* %out, align 8
-; SSE42: LV: Found an estimated cost of 104 for VF 8 For instruction: store i64 %valB, i64* %out, align 8
-; SSE42: LV: Found an estimated cost of 208 for VF 16 For instruction: store i64 %valB, i64* %out, align 8
+; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
+; SSE42: LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
+; SSE42: LV: Found an estimated cost of 52 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
+; SSE42: LV: Found an estimated cost of 104 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
+; SSE42: LV: Found an estimated cost of 208 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8
-; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %valB, i64* %out, align 8
-; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store i64 %valB, i64* %out, align 8
-; AVX1: LV: Found an estimated cost of 108 for VF 8 For instruction: store i64 %valB, i64* %out, align 8
-; AVX1: LV: Found an estimated cost of 216 for VF 16 For instruction: store i64 %valB, i64* %out, align 8
-; AVX1: LV: Found an estimated cost of 432 for VF 32 For instruction: store i64 %valB, i64* %out, align 8
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
+; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
+; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
+; AVX1: LV: Found an estimated cost of 108 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
+; AVX1: LV: Found an estimated cost of 216 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
+; AVX1: LV: Found an estimated cost of 432 for VF 32 For instruction: store i64 %valB, ptr %out, align 8
;
; AVX2-LABEL: 'test'
-; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8
-; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i64 %valB, i64* %out, align 8
-; AVX2: LV: Found an estimated cost of 14 for VF 4 For instruction: store i64 %valB, i64* %out, align 8
-; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction: store i64 %valB, i64* %out, align 8
-; AVX2: LV: Found an estimated cost of 56 for VF 16 For instruction: store i64 %valB, i64* %out, align 8
-; AVX2: LV: Found an estimated cost of 112 for VF 32 For instruction: store i64 %valB, i64* %out, align 8
+; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
+; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
+; AVX2: LV: Found an estimated cost of 14 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
+; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
+; AVX2: LV: Found an estimated cost of 56 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
+; AVX2: LV: Found an estimated cost of 112 for VF 32 For instruction: store i64 %valB, ptr %out, align 8
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8
-; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: store i64 %valB, i64* %out, align 8
-; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: store i64 %valB, i64* %out, align 8
-; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, i64* %out, align 8
-; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, i64* %out, align 8
-; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: store i64 %valB, i64* %out, align 8
-; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: store i64 %valB, i64* %out, align 8
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, ptr %out, align 8
+; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: store i64 %valB, ptr %out, align 8
+; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: store i64 %valB, ptr %out, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, ptr %out, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, ptr %out, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: store i64 %valB, ptr %out, align 8
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: store i64 %valB, ptr %out, align 8
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
- %valA = load i8, i8* %inA
+ %inA = getelementptr inbounds [1024 x i8], ptr @A, i64 0, i64 %iv
+ %valA = load i8, ptr %inA
%valA.ext = sext i8 %valA to i64
- %inB = getelementptr inbounds [1024 x i64], [1024 x i64]* @B, i64 0, i64 %iv
- %valB = load i64, i64* %inB
+ %inB = getelementptr inbounds [1024 x i64], ptr @B, i64 0, i64 %iv
+ %valB = load i64, ptr %inB
- %out = getelementptr inbounds [1024 x i64], [1024 x i64]* @C, i64 0, i64 %valA.ext
- store i64 %valB, i64* %out
+ %out = getelementptr inbounds [1024 x i64], ptr @C, i64 0, i64 %valA.ext
+ store i64 %valB, ptr %out
%iv.next = add nuw nsw i64 %iv, 1
%cmp = icmp ult i64 %iv.next, 1024
define void @test() {
; SSE2-LABEL: 'test'
-; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1
-; SSE2: LV: Found an estimated cost of 29 for VF 2 For instruction: store i8 %valB, i8* %out, align 1
-; SSE2: LV: Found an estimated cost of 59 for VF 4 For instruction: store i8 %valB, i8* %out, align 1
-; SSE2: LV: Found an estimated cost of 119 for VF 8 For instruction: store i8 %valB, i8* %out, align 1
-; SSE2: LV: Found an estimated cost of 239 for VF 16 For instruction: store i8 %valB, i8* %out, align 1
+; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1
+; SSE2: LV: Found an estimated cost of 29 for VF 2 For instruction: store i8 %valB, ptr %out, align 1
+; SSE2: LV: Found an estimated cost of 59 for VF 4 For instruction: store i8 %valB, ptr %out, align 1
+; SSE2: LV: Found an estimated cost of 119 for VF 8 For instruction: store i8 %valB, ptr %out, align 1
+; SSE2: LV: Found an estimated cost of 239 for VF 16 For instruction: store i8 %valB, ptr %out, align 1
;
; SSE42-LABEL: 'test'
-; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1
-; SSE42: LV: Found an estimated cost of 26 for VF 2 For instruction: store i8 %valB, i8* %out, align 1
-; SSE42: LV: Found an estimated cost of 52 for VF 4 For instruction: store i8 %valB, i8* %out, align 1
-; SSE42: LV: Found an estimated cost of 104 for VF 8 For instruction: store i8 %valB, i8* %out, align 1
-; SSE42: LV: Found an estimated cost of 208 for VF 16 For instruction: store i8 %valB, i8* %out, align 1
+; SSE42: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1
+; SSE42: LV: Found an estimated cost of 26 for VF 2 For instruction: store i8 %valB, ptr %out, align 1
+; SSE42: LV: Found an estimated cost of 52 for VF 4 For instruction: store i8 %valB, ptr %out, align 1
+; SSE42: LV: Found an estimated cost of 104 for VF 8 For instruction: store i8 %valB, ptr %out, align 1
+; SSE42: LV: Found an estimated cost of 208 for VF 16 For instruction: store i8 %valB, ptr %out, align 1
;
; AVX1-LABEL: 'test'
-; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1
-; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i8 %valB, i8* %out, align 1
-; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i8 %valB, i8* %out, align 1
-; AVX1: LV: Found an estimated cost of 106 for VF 8 For instruction: store i8 %valB, i8* %out, align 1
-; AVX1: LV: Found an estimated cost of 212 for VF 16 For instruction: store i8 %valB, i8* %out, align 1
-; AVX1: LV: Found an estimated cost of 425 for VF 32 For instruction: store i8 %valB, i8* %out, align 1
+; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1
+; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i8 %valB, ptr %out, align 1
+; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i8 %valB, ptr %out, align 1
+; AVX1: LV: Found an estimated cost of 106 for VF 8 For instruction: store i8 %valB, ptr %out, align 1
+; AVX1: LV: Found an estimated cost of 212 for VF 16 For instruction: store i8 %valB, ptr %out, align 1
+; AVX1: LV: Found an estimated cost of 425 for VF 32 For instruction: store i8 %valB, ptr %out, align 1
;
; AVX2-LABEL: 'test'
-; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1
-; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i8 %valB, i8* %out, align 1
-; AVX2: LV: Found an estimated cost of 13 for VF 4 For instruction: store i8 %valB, i8* %out, align 1
-; AVX2: LV: Found an estimated cost of 26 for VF 8 For instruction: store i8 %valB, i8* %out, align 1
-; AVX2: LV: Found an estimated cost of 52 for VF 16 For instruction: store i8 %valB, i8* %out, align 1
-; AVX2: LV: Found an estimated cost of 105 for VF 32 For instruction: store i8 %valB, i8* %out, align 1
+; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1
+; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i8 %valB, ptr %out, align 1
+; AVX2: LV: Found an estimated cost of 13 for VF 4 For instruction: store i8 %valB, ptr %out, align 1
+; AVX2: LV: Found an estimated cost of 26 for VF 8 For instruction: store i8 %valB, ptr %out, align 1
+; AVX2: LV: Found an estimated cost of 52 for VF 16 For instruction: store i8 %valB, ptr %out, align 1
+; AVX2: LV: Found an estimated cost of 105 for VF 32 For instruction: store i8 %valB, ptr %out, align 1
;
; AVX512-LABEL: 'test'
-; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1
-; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: store i8 %valB, i8* %out, align 1
-; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: store i8 %valB, i8* %out, align 1
-; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: store i8 %valB, i8* %out, align 1
-; AVX512: LV: Found an estimated cost of 54 for VF 16 For instruction: store i8 %valB, i8* %out, align 1
-; AVX512: LV: Found an estimated cost of 109 for VF 32 For instruction: store i8 %valB, i8* %out, align 1
-; AVX512: LV: Found an estimated cost of 219 for VF 64 For instruction: store i8 %valB, i8* %out, align 1
+; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, ptr %out, align 1
+; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: store i8 %valB, ptr %out, align 1
+; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: store i8 %valB, ptr %out, align 1
+; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: store i8 %valB, ptr %out, align 1
+; AVX512: LV: Found an estimated cost of 54 for VF 16 For instruction: store i8 %valB, ptr %out, align 1
+; AVX512: LV: Found an estimated cost of 109 for VF 32 For instruction: store i8 %valB, ptr %out, align 1
+; AVX512: LV: Found an estimated cost of 219 for VF 64 For instruction: store i8 %valB, ptr %out, align 1
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %inA = getelementptr inbounds [1024 x i8], [1024 x i8]* @A, i64 0, i64 %iv
- %valA = load i8, i8* %inA
+ %inA = getelementptr inbounds [1024 x i8], ptr @A, i64 0, i64 %iv
+ %valA = load i8, ptr %inA
%valA.ext = sext i8 %valA to i64
- %inB = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv
- %valB = load i8, i8* %inB
+ %inB = getelementptr inbounds [1024 x i8], ptr @B, i64 0, i64 %iv
+ %valB = load i8, ptr %inB
- %out = getelementptr inbounds [1024 x i8], [1024 x i8]* @C, i64 0, i64 %valA.ext
- store i8 %valB, i8* %out
+ %out = getelementptr inbounds [1024 x i8], ptr @C, i64 0, i64 %valA.ext
+ store i8 %valB, ptr %out
%iv.next = add nuw nsw i64 %iv, 1
%cmp = icmp ult i64 %iv.next, 1024
ret double %r
}
-define i64* @inttoptr_i64_p64(i64 %x) {
+define ptr @inttoptr_i64_p64(i64 %x) {
; CHECK-LABEL: 'inttoptr_i64_p64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = inttoptr i64 %x to i64*
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64* %r
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = inttoptr i64 %x to ptr
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret ptr %r
;
- %r = inttoptr i64 %x to i64*
- ret i64* %r
+ %r = inttoptr i64 %x to ptr
+ ret ptr %r
}
-define i64 @ptrtoint_p64_i64(i64* %x) {
+define i64 @ptrtoint_p64_i64(ptr %x) {
; CHECK-LABEL: 'ptrtoint_p64_i64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = ptrtoint i64* %x to i64
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = ptrtoint ptr %x to i64
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
;
- %r = ptrtoint i64* %x to i64
+ %r = ptrtoint ptr %x to i64
ret i64 %r
}
; RUN: opt < %s -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
; RUN: opt < %s -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE41
-define void @zext_v4i8_to_v4i64(<4 x i8>* %a) {
+define void @zext_v4i8_to_v4i64(ptr %a) {
; SSE2-LABEL: 'zext_v4i8_to_v4i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, ptr %a, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %2 = zext <4 x i8> %1 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'zext_v4i8_to_v4i64'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, ptr %a, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = zext <4 x i8> %1 to <4 x i64>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <4 x i8>, <4 x i8>* %a
+ %1 = load <4 x i8>, ptr %a
%2 = zext <4 x i8> %1 to <4 x i64>
- store <4 x i64> %2, <4 x i64>* undef, align 4
+ store <4 x i64> %2, ptr undef, align 4
ret void
}
-define void @sext_v4i8_to_v4i64(<4 x i8>* %a) {
+define void @sext_v4i8_to_v4i64(ptr %a) {
; SSE2-LABEL: 'sext_v4i8_to_v4i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, ptr %a, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %2 = sext <4 x i8> %1 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'sext_v4i8_to_v4i64'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, ptr %a, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <4 x i8> %1 to <4 x i64>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <4 x i8>, <4 x i8>* %a
+ %1 = load <4 x i8>, ptr %a
%2 = sext <4 x i8> %1 to <4 x i64>
- store <4 x i64> %2, <4 x i64>* undef, align 4
+ store <4 x i64> %2, ptr undef, align 4
ret void
}
-define void @zext_v4i16_to_v4i64(<4 x i16>* %a) {
+define void @zext_v4i16_to_v4i64(ptr %a) {
; SSE2-LABEL: 'zext_v4i16_to_v4i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a, align 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, ptr %a, align 8
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = zext <4 x i16> %1 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'zext_v4i16_to_v4i64'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a, align 8
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, ptr %a, align 8
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = zext <4 x i16> %1 to <4 x i64>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <4 x i16>, <4 x i16>* %a
+ %1 = load <4 x i16>, ptr %a
%2 = zext <4 x i16> %1 to <4 x i64>
- store <4 x i64> %2, <4 x i64>* undef, align 4
+ store <4 x i64> %2, ptr undef, align 4
ret void
}
-define void @sext_v4i16_to_v4i64(<4 x i16>* %a) {
+define void @sext_v4i16_to_v4i64(ptr %a) {
; SSE2-LABEL: 'sext_v4i16_to_v4i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a, align 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, ptr %a, align 8
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = sext <4 x i16> %1 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'sext_v4i16_to_v4i64'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a, align 8
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, ptr %a, align 8
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <4 x i16> %1 to <4 x i64>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <4 x i16>, <4 x i16>* %a
+ %1 = load <4 x i16>, ptr %a
%2 = sext <4 x i16> %1 to <4 x i64>
- store <4 x i64> %2, <4 x i64>* undef, align 4
+ store <4 x i64> %2, ptr undef, align 4
ret void
}
-define void @zext_v4i32_to_v4i64(<4 x i32>* %a) {
+define void @zext_v4i32_to_v4i64(ptr %a) {
; CHECK-LABEL: 'zext_v4i32_to_v4i64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, <4 x i32>* %a, align 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, ptr %a, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = zext <4 x i32> %1 to <4 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, ptr undef, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <4 x i32>, <4 x i32>* %a
+ %1 = load <4 x i32>, ptr %a
%2 = zext <4 x i32> %1 to <4 x i64>
- store <4 x i64> %2, <4 x i64>* undef, align 4
+ store <4 x i64> %2, ptr undef, align 4
ret void
}
-define void @sext_v4i32_to_v4i64(<4 x i32>* %a) {
+define void @sext_v4i32_to_v4i64(ptr %a) {
; SSE2-LABEL: 'sext_v4i32_to_v4i64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, <4 x i32>* %a, align 16
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, ptr %a, align 16
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = sext <4 x i32> %1 to <4 x i64>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'sext_v4i32_to_v4i64'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, <4 x i32>* %a, align 16
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, ptr %a, align 16
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <4 x i32> %1 to <4 x i64>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <4 x i32>, <4 x i32>* %a
+ %1 = load <4 x i32>, ptr %a
%2 = sext <4 x i32> %1 to <4 x i64>
- store <4 x i64> %2, <4 x i64>* undef, align 4
+ store <4 x i64> %2, ptr undef, align 4
ret void
}
-define void @zext_v16i16_to_v16i32(<16 x i16>* %a) {
+define void @zext_v16i16_to_v16i32(ptr %a) {
; CHECK-LABEL: 'zext_v16i16_to_v16i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <16 x i16>, <16 x i16>* %a, align 32
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <16 x i16>, ptr %a, align 32
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = zext <16 x i16> %1 to <16 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, <16 x i32>* undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, ptr undef, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <16 x i16>, <16 x i16>* %a
+ %1 = load <16 x i16>, ptr %a
%2 = zext <16 x i16> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* undef, align 4
+ store <16 x i32> %2, ptr undef, align 4
ret void
}
-define void @sext_v16i16_to_v16i32(<16 x i16>* %a) {
+define void @sext_v16i16_to_v16i32(ptr %a) {
; SSE2-LABEL: 'sext_v16i16_to_v16i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <16 x i16>, <16 x i16>* %a, align 32
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <16 x i16>, ptr %a, align 32
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %2 = sext <16 x i16> %1 to <16 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, <16 x i32>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'sext_v16i16_to_v16i32'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <16 x i16>, <16 x i16>* %a, align 32
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <16 x i16>, ptr %a, align 32
; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = sext <16 x i16> %1 to <16 x i32>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, <16 x i32>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <16 x i16>, <16 x i16>* %a
+ %1 = load <16 x i16>, ptr %a
%2 = sext <16 x i16> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* undef, align 4
+ store <16 x i32> %2, ptr undef, align 4
ret void
}
-define void @zext_v8i16_to_v8i32(<8 x i16>* %a) {
+define void @zext_v8i16_to_v8i32(ptr %a) {
; CHECK-LABEL: 'zext_v8i16_to_v8i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i16>, <8 x i16>* %a, align 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i16>, ptr %a, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = zext <8 x i16> %1 to <8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, <8 x i32>* undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, ptr undef, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <8 x i16>, <8 x i16>* %a
+ %1 = load <8 x i16>, ptr %a
%2 = zext <8 x i16> %1 to <8 x i32>
- store <8 x i32> %2, <8 x i32>* undef, align 4
+ store <8 x i32> %2, ptr undef, align 4
ret void
}
-define void @sext_v8i16_to_v8i32(<8 x i16>* %a) {
+define void @sext_v8i16_to_v8i32(ptr %a) {
; SSE2-LABEL: 'sext_v8i16_to_v8i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i16>, <8 x i16>* %a, align 16
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i16>, ptr %a, align 16
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = sext <8 x i16> %1 to <8 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, <8 x i32>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'sext_v8i16_to_v8i32'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i16>, <8 x i16>* %a, align 16
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i16>, ptr %a, align 16
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <8 x i16> %1 to <8 x i32>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, <8 x i32>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <8 x i16>, <8 x i16>* %a
+ %1 = load <8 x i16>, ptr %a
%2 = sext <8 x i16> %1 to <8 x i32>
- store <8 x i32> %2, <8 x i32>* undef, align 4
+ store <8 x i32> %2, ptr undef, align 4
ret void
}
-define void @zext_v4i16_to_v4i32(<4 x i16>* %a) {
+define void @zext_v4i16_to_v4i32(ptr %a) {
; CHECK-LABEL: 'zext_v4i16_to_v4i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, ptr %a, align 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = zext <4 x i16> %1 to <4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %2, <4 x i32>* undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %2, ptr undef, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <4 x i16>, <4 x i16>* %a
+ %1 = load <4 x i16>, ptr %a
%2 = zext <4 x i16> %1 to <4 x i32>
- store <4 x i32> %2, <4 x i32>* undef, align 4
+ store <4 x i32> %2, ptr undef, align 4
ret void
}
-define void @sext_v4i16_to_v4i32(<4 x i16>* %a) {
+define void @sext_v4i16_to_v4i32(ptr %a) {
; SSE2-LABEL: 'sext_v4i16_to_v4i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a, align 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, ptr %a, align 8
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <4 x i16> %1 to <4 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %2, <4 x i32>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %2, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'sext_v4i16_to_v4i32'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a, align 8
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, ptr %a, align 8
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = sext <4 x i16> %1 to <4 x i32>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %2, <4 x i32>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %2, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <4 x i16>, <4 x i16>* %a
+ %1 = load <4 x i16>, ptr %a
%2 = sext <4 x i16> %1 to <4 x i32>
- store <4 x i32> %2, <4 x i32>* undef, align 4
+ store <4 x i32> %2, ptr undef, align 4
ret void
}
-define void @zext_v16i8_to_v16i32(<16 x i8>* %a) {
+define void @zext_v16i8_to_v16i32(ptr %a) {
; SSE2-LABEL: 'zext_v16i8_to_v16i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, <16 x i8>* %a, align 16
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, ptr %a, align 16
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %2 = zext <16 x i8> %1 to <16 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, <16 x i32>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'zext_v16i8_to_v16i32'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, <16 x i8>* %a, align 16
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, ptr %a, align 16
; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = zext <16 x i8> %1 to <16 x i32>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, <16 x i32>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <16 x i8>, <16 x i8>* %a
+ %1 = load <16 x i8>, ptr %a
%2 = zext <16 x i8> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* undef, align 4
+ store <16 x i32> %2, ptr undef, align 4
ret void
}
-define void @sext_v16i8_to_v16i32(<16 x i8>* %a) {
+define void @sext_v16i8_to_v16i32(ptr %a) {
; SSE2-LABEL: 'sext_v16i8_to_v16i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, <16 x i8>* %a, align 16
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, ptr %a, align 16
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = sext <16 x i8> %1 to <16 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, <16 x i32>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'sext_v16i8_to_v16i32'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, <16 x i8>* %a, align 16
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, ptr %a, align 16
; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = sext <16 x i8> %1 to <16 x i32>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, <16 x i32>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <16 x i8>, <16 x i8>* %a
+ %1 = load <16 x i8>, ptr %a
%2 = sext <16 x i8> %1 to <16 x i32>
- store <16 x i32> %2, <16 x i32>* undef, align 4
+ store <16 x i32> %2, ptr undef, align 4
ret void
}
-define void @zext_v8i8_to_v8i32(<8 x i8>* %a) {
+define void @zext_v8i8_to_v8i32(ptr %a) {
; SSE2-LABEL: 'zext_v8i8_to_v8i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, <8 x i8>* %a, align 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, ptr %a, align 8
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = zext <8 x i8> %1 to <8 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, <8 x i32>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'zext_v8i8_to_v8i32'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, <8 x i8>* %a, align 8
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, ptr %a, align 8
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = zext <8 x i8> %1 to <8 x i32>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, <8 x i32>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <8 x i8>, <8 x i8>* %a
+ %1 = load <8 x i8>, ptr %a
%2 = zext <8 x i8> %1 to <8 x i32>
- store <8 x i32> %2, <8 x i32>* undef, align 4
+ store <8 x i32> %2, ptr undef, align 4
ret void
}
-define void @sext_v8i8_to_v8i32(<8 x i8>* %a) {
+define void @sext_v8i8_to_v8i32(ptr %a) {
; SSE2-LABEL: 'sext_v8i8_to_v8i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, <8 x i8>* %a, align 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, ptr %a, align 8
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = sext <8 x i8> %1 to <8 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, <8 x i32>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'sext_v8i8_to_v8i32'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, <8 x i8>* %a, align 8
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, ptr %a, align 8
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <8 x i8> %1 to <8 x i32>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, <8 x i32>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <8 x i8>, <8 x i8>* %a
+ %1 = load <8 x i8>, ptr %a
%2 = sext <8 x i8> %1 to <8 x i32>
- store <8 x i32> %2, <8 x i32>* undef, align 4
+ store <8 x i32> %2, ptr undef, align 4
ret void
}
-define void @zext_v4i8_to_v4i32(<4 x i8>* %a) {
+define void @zext_v4i8_to_v4i32(ptr %a) {
; SSE2-LABEL: 'zext_v4i8_to_v4i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, ptr %a, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = zext <4 x i8> %1 to <4 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %2, <4 x i32>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %2, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'zext_v4i8_to_v4i32'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, ptr %a, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = zext <4 x i8> %1 to <4 x i32>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %2, <4 x i32>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %2, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <4 x i8>, <4 x i8>* %a
+ %1 = load <4 x i8>, ptr %a
%2 = zext <4 x i8> %1 to <4 x i32>
- store <4 x i32> %2, <4 x i32>* undef, align 4
+ store <4 x i32> %2, ptr undef, align 4
ret void
}
-define void @sext_v4i8_to_v4i32(<4 x i8>* %a) {
+define void @sext_v4i8_to_v4i32(ptr %a) {
; SSE2-LABEL: 'sext_v4i8_to_v4i32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, ptr %a, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = sext <4 x i8> %1 to <4 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %2, <4 x i32>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %2, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'sext_v4i8_to_v4i32'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, ptr %a, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = sext <4 x i8> %1 to <4 x i32>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %2, <4 x i32>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %2, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <4 x i8>, <4 x i8>* %a
+ %1 = load <4 x i8>, ptr %a
%2 = sext <4 x i8> %1 to <4 x i32>
- store <4 x i32> %2, <4 x i32>* undef, align 4
+ store <4 x i32> %2, ptr undef, align 4
ret void
}
-define void @zext_v16i8_to_v16i16(<16 x i8>* %a) {
+define void @zext_v16i8_to_v16i16(ptr %a) {
; CHECK-LABEL: 'zext_v16i8_to_v16i16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, <16 x i8>* %a, align 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, ptr %a, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = zext <16 x i8> %1 to <16 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %2, <16 x i16>* undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %2, ptr undef, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <16 x i8>, <16 x i8>* %a
+ %1 = load <16 x i8>, ptr %a
%2 = zext <16 x i8> %1 to <16 x i16>
- store <16 x i16> %2, <16 x i16>* undef, align 4
+ store <16 x i16> %2, ptr undef, align 4
ret void
}
-define void @sext_v16i8_to_v16i16(<16 x i8>* %a) {
+define void @sext_v16i8_to_v16i16(ptr %a) {
; SSE2-LABEL: 'sext_v16i8_to_v16i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, <16 x i8>* %a, align 16
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, ptr %a, align 16
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = sext <16 x i8> %1 to <16 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %2, <16 x i16>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %2, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'sext_v16i8_to_v16i16'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, <16 x i8>* %a, align 16
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, ptr %a, align 16
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <16 x i8> %1 to <16 x i16>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %2, <16 x i16>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %2, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <16 x i8>, <16 x i8>* %a
+ %1 = load <16 x i8>, ptr %a
%2 = sext <16 x i8> %1 to <16 x i16>
- store <16 x i16> %2, <16 x i16>* undef, align 4
+ store <16 x i16> %2, ptr undef, align 4
ret void
}
-define void @zext_v8i8_to_v8i16(<8 x i8>* %a) {
+define void @zext_v8i8_to_v8i16(ptr %a) {
; CHECK-LABEL: 'zext_v8i8_to_v8i16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, <8 x i8>* %a, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, ptr %a, align 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = zext <8 x i8> %1 to <8 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %2, <8 x i16>* undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %2, ptr undef, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <8 x i8>, <8 x i8>* %a
+ %1 = load <8 x i8>, ptr %a
%2 = zext <8 x i8> %1 to <8 x i16>
- store <8 x i16> %2, <8 x i16>* undef, align 4
+ store <8 x i16> %2, ptr undef, align 4
ret void
}
-define void @sext_v8i8_to_v8i16(<8 x i8>* %a) {
+define void @sext_v8i8_to_v8i16(ptr %a) {
; SSE2-LABEL: 'sext_v8i8_to_v8i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, <8 x i8>* %a, align 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, ptr %a, align 8
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <8 x i8> %1 to <8 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %2, <8 x i16>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %2, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'sext_v8i8_to_v8i16'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, <8 x i8>* %a, align 8
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, ptr %a, align 8
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = sext <8 x i8> %1 to <8 x i16>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %2, <8 x i16>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %2, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <8 x i8>, <8 x i8>* %a
+ %1 = load <8 x i8>, ptr %a
%2 = sext <8 x i8> %1 to <8 x i16>
- store <8 x i16> %2, <8 x i16>* undef, align 4
+ store <8 x i16> %2, ptr undef, align 4
ret void
}
-define void @zext_v4i8_to_v4i16(<4 x i8>* %a) {
+define void @zext_v4i8_to_v4i16(ptr %a) {
; CHECK-LABEL: 'zext_v4i8_to_v4i16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, ptr %a, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = zext <4 x i8> %1 to <4 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %2, <4 x i16>* undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %2, ptr undef, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <4 x i8>, <4 x i8>* %a
+ %1 = load <4 x i8>, ptr %a
%2 = zext <4 x i8> %1 to <4 x i16>
- store <4 x i16> %2, <4 x i16>* undef, align 4
+ store <4 x i16> %2, ptr undef, align 4
ret void
}
-define void @sext_v4i8_to_v4i16(<4 x i8>* %a) {
+define void @sext_v4i8_to_v4i16(ptr %a) {
; SSE2-LABEL: 'sext_v4i8_to_v4i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, ptr %a, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <4 x i8> %1 to <4 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %2, <4 x i16>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %2, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'sext_v4i8_to_v4i16'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, ptr %a, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = sext <4 x i8> %1 to <4 x i16>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %2, <4 x i16>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %2, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <4 x i8>, <4 x i8>* %a
+ %1 = load <4 x i8>, ptr %a
%2 = sext <4 x i8> %1 to <4 x i16>
- store <4 x i16> %2, <4 x i16>* undef, align 4
+ store <4 x i16> %2, ptr undef, align 4
ret void
}
-define void @truncate_v16i32_to_v16i16(<16 x i32>* %a) {
+define void @truncate_v16i32_to_v16i16(ptr %a) {
; CHECK-LABEL: 'truncate_v16i32_to_v16i16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = load <16 x i32>, <16 x i32>* %a, align 64
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = load <16 x i32>, ptr %a, align 64
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %2 = trunc <16 x i32> %1 to <16 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %2, <16 x i16>* undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %2, ptr undef, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <16 x i32>, <16 x i32>* %a
+ %1 = load <16 x i32>, ptr %a
%2 = trunc <16 x i32> %1 to <16 x i16>
- store <16 x i16> %2, <16 x i16>* undef, align 4
+ store <16 x i16> %2, ptr undef, align 4
ret void
}
-define void @truncate_v8i32_to_v8i16(<8 x i32>* %a) {
+define void @truncate_v8i32_to_v8i16(ptr %a) {
; CHECK-LABEL: 'truncate_v8i32_to_v8i16'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <8 x i32>, <8 x i32>* %a, align 32
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <8 x i32>, ptr %a, align 32
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %2 = trunc <8 x i32> %1 to <8 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %2, <8 x i16>* undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %2, ptr undef, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <8 x i32>, <8 x i32>* %a
+ %1 = load <8 x i32>, ptr %a
%2 = trunc <8 x i32> %1 to <8 x i16>
- store <8 x i16> %2, <8 x i16>* undef, align 4
+ store <8 x i16> %2, ptr undef, align 4
ret void
}
-define void @truncate_v4i32_to_v4i16(<4 x i32>* %a) {
+define void @truncate_v4i32_to_v4i16(ptr %a) {
; SSE2-LABEL: 'truncate_v4i32_to_v4i16'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, <4 x i32>* %a, align 16
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, ptr %a, align 16
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = trunc <4 x i32> %1 to <4 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %2, <4 x i16>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %2, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'truncate_v4i32_to_v4i16'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, <4 x i32>* %a, align 16
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, ptr %a, align 16
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <4 x i32> %1 to <4 x i16>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %2, <4 x i16>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %2, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <4 x i32>, <4 x i32>* %a
+ %1 = load <4 x i32>, ptr %a
%2 = trunc <4 x i32> %1 to <4 x i16>
- store <4 x i16> %2, <4 x i16>* undef, align 4
+ store <4 x i16> %2, ptr undef, align 4
ret void
}
-define void @truncate_v16i32_to_v16i8(<16 x i32>* %a) {
+define void @truncate_v16i32_to_v16i8(ptr %a) {
; CHECK-LABEL: 'truncate_v16i32_to_v16i8'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = load <16 x i32>, <16 x i32>* %a, align 64
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = load <16 x i32>, ptr %a, align 64
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = trunc <16 x i32> %1 to <16 x i8>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %2, <16 x i8>* undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %2, ptr undef, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <16 x i32>, <16 x i32>* %a
+ %1 = load <16 x i32>, ptr %a
%2 = trunc <16 x i32> %1 to <16 x i8>
- store <16 x i8> %2, <16 x i8>* undef, align 4
+ store <16 x i8> %2, ptr undef, align 4
ret void
}
-define void @truncate_v8i32_to_v8i8(<8 x i32>* %a) {
+define void @truncate_v8i32_to_v8i8(ptr %a) {
; SSE2-LABEL: 'truncate_v8i32_to_v8i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <8 x i32>, <8 x i32>* %a, align 32
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <8 x i32>, ptr %a, align 32
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = trunc <8 x i32> %1 to <8 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %2, <8 x i8>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %2, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'truncate_v8i32_to_v8i8'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <8 x i32>, <8 x i32>* %a, align 32
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <8 x i32>, ptr %a, align 32
; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = trunc <8 x i32> %1 to <8 x i8>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %2, <8 x i8>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %2, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <8 x i32>, <8 x i32>* %a
+ %1 = load <8 x i32>, ptr %a
%2 = trunc <8 x i32> %1 to <8 x i8>
- store <8 x i8> %2, <8 x i8>* undef, align 4
+ store <8 x i8> %2, ptr undef, align 4
ret void
}
-define void @truncate_v4i32_to_v4i8(<4 x i32>* %a) {
+define void @truncate_v4i32_to_v4i8(ptr %a) {
; SSE2-LABEL: 'truncate_v4i32_to_v4i8'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, <4 x i32>* %a, align 16
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, ptr %a, align 16
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = trunc <4 x i32> %1 to <4 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %2, <4 x i8>* undef, align 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %2, ptr undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'truncate_v4i32_to_v4i8'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, <4 x i32>* %a, align 16
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, ptr %a, align 16
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <4 x i32> %1 to <4 x i8>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %2, <4 x i8>* undef, align 4
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %2, ptr undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <4 x i32>, <4 x i32>* %a
+ %1 = load <4 x i32>, ptr %a
%2 = trunc <4 x i32> %1 to <4 x i8>
- store <4 x i8> %2, <4 x i8>* undef, align 4
+ store <4 x i8> %2, ptr undef, align 4
ret void
}
-define void @truncate_v16i16_to_v16i8(<16 x i16>* %a) {
+define void @truncate_v16i16_to_v16i8(ptr %a) {
; CHECK-LABEL: 'truncate_v16i16_to_v16i8'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <16 x i16>, <16 x i16>* %a, align 32
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <16 x i16>, ptr %a, align 32
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = trunc <16 x i16> %1 to <16 x i8>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %2, <16 x i8>* undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %2, ptr undef, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <16 x i16>, <16 x i16>* %a
+ %1 = load <16 x i16>, ptr %a
%2 = trunc <16 x i16> %1 to <16 x i8>
- store <16 x i8> %2, <16 x i8>* undef, align 4
+ store <16 x i8> %2, ptr undef, align 4
ret void
}
-define void @truncate_v8i16_to_v8i8(<8 x i16>* %a) {
+define void @truncate_v8i16_to_v8i8(ptr %a) {
; CHECK-LABEL: 'truncate_v8i16_to_v8i8'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i16>, <8 x i16>* %a, align 16
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i16>, ptr %a, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <8 x i16> %1 to <8 x i8>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %2, <8 x i8>* undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %2, ptr undef, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <8 x i16>, <8 x i16>* %a
+ %1 = load <8 x i16>, ptr %a
%2 = trunc <8 x i16> %1 to <8 x i8>
- store <8 x i8> %2, <8 x i8>* undef, align 4
+ store <8 x i8> %2, ptr undef, align 4
ret void
}
-define void @truncate_v4i16_to_v4i8(<4 x i16>* %a) {
+define void @truncate_v4i16_to_v4i8(ptr %a) {
; CHECK-LABEL: 'truncate_v4i16_to_v4i8'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a, align 8
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, ptr %a, align 8
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <4 x i16> %1 to <4 x i8>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %2, <4 x i8>* undef, align 4
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %2, ptr undef, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
- %1 = load <4 x i16>, <4 x i16>* %a
+ %1 = load <4 x i16>, ptr %a
%2 = trunc <4 x i16> %1 to <4 x i8>
- store <4 x i8> %2, <4 x i8>* undef, align 4
+ store <4 x i8> %2, ptr undef, align 4
ret void
}
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = shl nsw i64 %indvars.iv, 1
- %arrayidx = getelementptr inbounds [10240 x i16], [10240 x i16]* @A, i64 0, i64 %0
- %1 = load i16, i16* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds [10240 x i16], [10240 x i16]* @B, i64 0, i64 %indvars.iv
- store i16 %1, i16* %arrayidx2, align 2
+ %arrayidx = getelementptr inbounds [10240 x i16], ptr @A, i64 0, i64 %0
+ %1 = load i16, ptr %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds [10240 x i16], ptr @B, i64 0, i64 %indvars.iv
+ store i16 %1, ptr %arrayidx2, align 2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = mul nsw i64 %indvars.iv, 3
- %arrayidx = getelementptr inbounds [10240 x i16], [10240 x i16]* @A, i64 0, i64 %0
- %1 = load i16, i16* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds [10240 x i16], [10240 x i16]* @B, i64 0, i64 %indvars.iv
- store i16 %1, i16* %arrayidx2, align 2
+ %arrayidx = getelementptr inbounds [10240 x i16], ptr @A, i64 0, i64 %0
+ %1 = load i16, ptr %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds [10240 x i16], ptr @B, i64 0, i64 %indvars.iv
+ store i16 %1, ptr %arrayidx2, align 2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = shl nsw i64 %indvars.iv, 2
- %arrayidx = getelementptr inbounds [10240 x i16], [10240 x i16]* @A, i64 0, i64 %0
- %1 = load i16, i16* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds [10240 x i16], [10240 x i16]* @B, i64 0, i64 %indvars.iv
- store i16 %1, i16* %arrayidx2, align 2
+ %arrayidx = getelementptr inbounds [10240 x i16], ptr @A, i64 0, i64 %0
+ %1 = load i16, ptr %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds [10240 x i16], ptr @B, i64 0, i64 %indvars.iv
+ store i16 %1, ptr %arrayidx2, align 2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = mul nsw i64 %indvars.iv, 5
- %arrayidx = getelementptr inbounds [10240 x i16], [10240 x i16]* @A, i64 0, i64 %0
- %1 = load i16, i16* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds [10240 x i16], [10240 x i16]* @B, i64 0, i64 %indvars.iv
- store i16 %1, i16* %arrayidx2, align 2
+ %arrayidx = getelementptr inbounds [10240 x i16], ptr @A, i64 0, i64 %0
+ %1 = load i16, ptr %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds [10240 x i16], ptr @B, i64 0, i64 %indvars.iv
+ store i16 %1, ptr %arrayidx2, align 2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = shl nsw i64 %indvars.iv, 1
- %arrayidx = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %0
- %1 = load i32, i32* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %indvars.iv
- store i32 %1, i32* %arrayidx2, align 2
+ %arrayidx = getelementptr inbounds [10240 x i32], ptr @A, i64 0, i64 %0
+ %1 = load i32, ptr %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds [10240 x i32], ptr @B, i64 0, i64 %indvars.iv
+ store i32 %1, ptr %arrayidx2, align 2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = mul nsw i64 %indvars.iv, 3
- %arrayidx = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %0
- %1 = load i32, i32* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %indvars.iv
- store i32 %1, i32* %arrayidx2, align 2
+ %arrayidx = getelementptr inbounds [10240 x i32], ptr @A, i64 0, i64 %0
+ %1 = load i32, ptr %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds [10240 x i32], ptr @B, i64 0, i64 %indvars.iv
+ store i32 %1, ptr %arrayidx2, align 2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = shl nsw i64 %indvars.iv, 2
- %arrayidx = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %0
- %1 = load i32, i32* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %indvars.iv
- store i32 %1, i32* %arrayidx2, align 2
+ %arrayidx = getelementptr inbounds [10240 x i32], ptr @A, i64 0, i64 %0
+ %1 = load i32, ptr %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds [10240 x i32], ptr @B, i64 0, i64 %indvars.iv
+ store i32 %1, ptr %arrayidx2, align 2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = mul nsw i64 %indvars.iv, 5
- %arrayidx = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %0
- %1 = load i32, i32* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %indvars.iv
- store i32 %1, i32* %arrayidx2, align 2
+ %arrayidx = getelementptr inbounds [10240 x i32], ptr @A, i64 0, i64 %0
+ %1 = load i32, ptr %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds [10240 x i32], ptr @B, i64 0, i64 %indvars.iv
+ store i32 %1, ptr %arrayidx2, align 2
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = shl nsw i64 %indvars.iv, 1
- %arrayidx = getelementptr inbounds [10240 x i64], [10240 x i64]* @A, i64 0, i64 %0
- %1 = load i64, i64* %arrayidx, align 16
- %arrayidx2 = getelementptr inbounds [10240 x i64], [10240 x i64]* @B, i64 0, i64 %indvars.iv
- store i64 %1, i64* %arrayidx2, align 8
+ %arrayidx = getelementptr inbounds [10240 x i64], ptr @A, i64 0, i64 %0
+ %1 = load i64, ptr %arrayidx, align 16
+ %arrayidx2 = getelementptr inbounds [10240 x i64], ptr @B, i64 0, i64 %indvars.iv
+ store i64 %1, ptr %arrayidx2, align 8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = mul nsw i64 %indvars.iv, 3
- %arrayidx = getelementptr inbounds [10240 x i64], [10240 x i64]* @A, i64 0, i64 %0
- %1 = load i64, i64* %arrayidx, align 16
- %arrayidx2 = getelementptr inbounds [10240 x i64], [10240 x i64]* @B, i64 0, i64 %indvars.iv
- store i64 %1, i64* %arrayidx2, align 8
+ %arrayidx = getelementptr inbounds [10240 x i64], ptr @A, i64 0, i64 %0
+ %1 = load i64, ptr %arrayidx, align 16
+ %arrayidx2 = getelementptr inbounds [10240 x i64], ptr @B, i64 0, i64 %indvars.iv
+ store i64 %1, ptr %arrayidx2, align 8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = mul nsw i64 %indvars.iv, 4
- %arrayidx = getelementptr inbounds [10240 x i64], [10240 x i64]* @A, i64 0, i64 %0
- %1 = load i64, i64* %arrayidx, align 16
- %arrayidx2 = getelementptr inbounds [10240 x i64], [10240 x i64]* @B, i64 0, i64 %indvars.iv
- store i64 %1, i64* %arrayidx2, align 8
+ %arrayidx = getelementptr inbounds [10240 x i64], ptr @A, i64 0, i64 %0
+ %1 = load i64, ptr %arrayidx, align 16
+ %arrayidx2 = getelementptr inbounds [10240 x i64], ptr @B, i64 0, i64 %indvars.iv
+ store i64 %1, ptr %arrayidx2, align 8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = shl nsw i64 %indvars.iv, 1
- %arrayidx = getelementptr inbounds [10240 x i8], [10240 x i8]* @A, i64 0, i64 %0
- %1 = load i8, i8* %arrayidx, align 2
- %arrayidx2 = getelementptr inbounds [10240 x i8], [10240 x i8]* @B, i64 0, i64 %indvars.iv
- store i8 %1, i8* %arrayidx2, align 1
+ %arrayidx = getelementptr inbounds [10240 x i8], ptr @A, i64 0, i64 %0
+ %1 = load i8, ptr %arrayidx, align 2
+ %arrayidx2 = getelementptr inbounds [10240 x i8], ptr @B, i64 0, i64 %indvars.iv
+ store i8 %1, ptr %arrayidx2, align 1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = mul nsw i64 %indvars.iv, 3
- %arrayidx = getelementptr inbounds [10240 x i8], [10240 x i8]* @A, i64 0, i64 %0
- %1 = load i8, i8* %arrayidx, align 2
- %arrayidx2 = getelementptr inbounds [10240 x i8], [10240 x i8]* @B, i64 0, i64 %indvars.iv
- store i8 %1, i8* %arrayidx2, align 1
+ %arrayidx = getelementptr inbounds [10240 x i8], ptr @A, i64 0, i64 %0
+ %1 = load i8, ptr %arrayidx, align 2
+ %arrayidx2 = getelementptr inbounds [10240 x i8], ptr @B, i64 0, i64 %indvars.iv
+ store i8 %1, ptr %arrayidx2, align 1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = shl nsw i64 %indvars.iv, 2
- %arrayidx = getelementptr inbounds [10240 x i8], [10240 x i8]* @A, i64 0, i64 %0
- %1 = load i8, i8* %arrayidx, align 2
- %arrayidx2 = getelementptr inbounds [10240 x i8], [10240 x i8]* @B, i64 0, i64 %indvars.iv
- store i8 %1, i8* %arrayidx2, align 1
+ %arrayidx = getelementptr inbounds [10240 x i8], ptr @A, i64 0, i64 %0
+ %1 = load i8, ptr %arrayidx, align 2
+ %arrayidx2 = getelementptr inbounds [10240 x i8], ptr @B, i64 0, i64 %indvars.iv
+ store i8 %1, ptr %arrayidx2, align 1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = mul nsw i64 %indvars.iv, 5
- %arrayidx = getelementptr inbounds [10240 x i8], [10240 x i8]* @A, i64 0, i64 %0
- %1 = load i8, i8* %arrayidx, align 2
- %arrayidx2 = getelementptr inbounds [10240 x i8], [10240 x i8]* @B, i64 0, i64 %indvars.iv
- store i8 %1, i8* %arrayidx2, align 1
+ %arrayidx = getelementptr inbounds [10240 x i8], ptr @A, i64 0, i64 %0
+ %1 = load i8, ptr %arrayidx, align 2
+ %arrayidx2 = getelementptr inbounds [10240 x i8], ptr @B, i64 0, i64 %indvars.iv
+ store i8 %1, ptr %arrayidx2, align 1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
%struct.S = type { [1000 x i32] }
-declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
+declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
-define <4 x i32> @foov(<4 x %struct.S*> %s, i64 %base){
+define <4 x i32> @foov(<4 x ptr> %s, i64 %base){
%temp = insertelement <4 x i64> poison, i64 %base, i32 0
%vector = shufflevector <4 x i64> %temp, <4 x i64> poison, <4 x i32> zeroinitializer
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds %struct.S
- %B = getelementptr inbounds %struct.S, <4 x %struct.S*> %s, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %B = getelementptr inbounds %struct.S, <4 x ptr> %s, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds [1000 x i32]
- %arrayidx = getelementptr inbounds [1000 x i32], <4 x [1000 x i32]*> %B, <4 x i64> zeroinitializer, <4 x i64> %vector
- %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %arrayidx, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ %arrayidx = getelementptr inbounds [1000 x i32], <4 x ptr> %B, <4 x i64> zeroinitializer, <4 x i64> %vector
+ %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %arrayidx, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %res
}
%struct.S = type { [1000 x i32] }
-declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
+declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
-define <4 x i32> @foov(<4 x %struct.S*> %s, i64 %base){
+define <4 x i32> @foov(<4 x ptr> %s, i64 %base){
%temp = insertelement <4 x i64> undef, i64 %base, i32 0
%vector = shufflevector <4 x i64> %temp, <4 x i64> undef, <4 x i32> zeroinitializer
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds %struct.S
- %B = getelementptr inbounds %struct.S, <4 x %struct.S*> %s, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %B = getelementptr inbounds %struct.S, <4 x ptr> %s, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds [1000 x i32]
- %arrayidx = getelementptr inbounds [1000 x i32], <4 x [1000 x i32]*> %B, <4 x i64> zeroinitializer, <4 x i64> %vector
- %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %arrayidx, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ %arrayidx = getelementptr inbounds [1000 x i32], <4 x ptr> %B, <4 x i64> zeroinitializer, <4 x i64> %vector
+ %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %arrayidx, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %res
}
define i32 @trivially_free() {
; CHECK-SIZE-LABEL: 'trivially_free'
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = call i32 @llvm.annotation.i32.p0i8(i32 undef, i8* undef, i8* undef, i32 undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = call i32 @llvm.annotation.i32.p0(i32 undef, ptr undef, ptr undef, i32 undef)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.assume(i1 undef)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.experimental.noalias.scope.decl(metadata !0)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.sideeffect()
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = call {}* @llvm.invariant.start.p0i8(i64 1, i8* undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.invariant.end.p0i8({}* undef, i64 1, i8* undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = call i8* @llvm.strip.invariant.group.p0i8(i8* undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = call ptr @llvm.invariant.start.p0(i64 1, ptr undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.invariant.end.p0(ptr undef, i64 1, ptr undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = call ptr @llvm.launder.invariant.group.p0(ptr undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = call ptr @llvm.strip.invariant.group.p0(ptr undef)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = call i1 @llvm.is.constant.i32(i32 undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0i8(i64 1, i8* undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0i8(i64 1, i8* undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 true, i1 true, i1 true)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a6 = call i8* @llvm.ptr.annotation.p0i8.p0i8(i8* undef, i8* undef, i8* undef, i32 undef, i8* undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation.p0i8.p0i8(i8* undef, i8* undef, i8* undef, i32 undef, i8* undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0(i64 1, ptr undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0(i64 1, ptr undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0(ptr undef, i1 true, i1 true, i1 true)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a6 = call ptr @llvm.ptr.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-THROUGHPUT-LABEL: 'trivially_free'
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = call i32 @llvm.annotation.i32.p0i8(i32 undef, i8* undef, i8* undef, i32 undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = call i32 @llvm.annotation.i32.p0(i32 undef, ptr undef, ptr undef, i32 undef)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.assume(i1 undef)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.experimental.noalias.scope.decl(metadata !0)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.sideeffect()
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = call {}* @llvm.invariant.start.p0i8(i64 1, i8* undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.invariant.end.p0i8({}* undef, i64 1, i8* undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = call i8* @llvm.strip.invariant.group.p0i8(i8* undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = call ptr @llvm.invariant.start.p0(i64 1, ptr undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.invariant.end.p0(ptr undef, i64 1, ptr undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = call ptr @llvm.launder.invariant.group.p0(ptr undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = call ptr @llvm.strip.invariant.group.p0(ptr undef)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = call i1 @llvm.is.constant.i32(i32 undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0i8(i64 1, i8* undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0i8(i64 1, i8* undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 true, i1 true, i1 true)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a6 = call i8* @llvm.ptr.annotation.p0i8.p0i8(i8* undef, i8* undef, i8* undef, i32 undef, i8* undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation.p0i8.p0i8(i8* undef, i8* undef, i8* undef, i32 undef, i8* undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0(i64 1, ptr undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0(i64 1, ptr undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0(ptr undef, i1 true, i1 true, i1 true)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a6 = call ptr @llvm.ptr.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
- %a0 = call i32 @llvm.annotation.i32(i32 undef, i8* undef, i8* undef, i32 undef)
+ %a0 = call i32 @llvm.annotation.i32(i32 undef, ptr undef, ptr undef, i32 undef)
call void @llvm.assume(i1 undef)
call void @llvm.experimental.noalias.scope.decl(metadata !4)
call void @llvm.sideeffect()
- call void @llvm.dbg.declare(metadata i8** undef, metadata !0, metadata !DIExpression())
+ call void @llvm.dbg.declare(metadata ptr undef, metadata !0, metadata !DIExpression())
call void @llvm.dbg.value(metadata i64 undef, i64 undef, metadata !DIExpression(), metadata !DIExpression())
call void @llvm.dbg.label(metadata !2)
- %a1 = call {}* @llvm.invariant.start.p0i8(i64 1, i8* undef)
- call void @llvm.invariant.end.p0i8({}* undef, i64 1, i8* undef)
- %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* undef)
- %a3 = call i8* @llvm.strip.invariant.group.p0i8(i8* undef)
+ %a1 = call ptr @llvm.invariant.start.p0(i64 1, ptr undef)
+ call void @llvm.invariant.end.p0(ptr undef, i64 1, ptr undef)
+ %a2 = call ptr @llvm.launder.invariant.group.p0(ptr undef)
+ %a3 = call ptr @llvm.strip.invariant.group.p0(ptr undef)
%a4 = call i1 @llvm.is.constant.i32(i32 undef)
- call void @llvm.lifetime.start.p0i8(i64 1, i8* undef)
- call void @llvm.lifetime.end.p0i8(i64 1, i8* undef)
- %a5 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 1, i1 1, i1 1)
- %a6 = call i8* @llvm.ptr.annotation.p0i8(i8* undef, i8* undef, i8* undef, i32 undef, i8* undef)
- call void @llvm.var.annotation(i8* undef, i8* undef, i8* undef, i32 undef, i8* undef)
+ call void @llvm.lifetime.start.p0(i64 1, ptr undef)
+ call void @llvm.lifetime.end.p0(i64 1, ptr undef)
+ %a5 = call i64 @llvm.objectsize.i64.p0(ptr undef, i1 1, i1 1, i1 1)
+ %a6 = call ptr @llvm.ptr.annotation.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
+ call void @llvm.var.annotation(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
ret i32 undef
}
-declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32)
+declare i32 @llvm.annotation.i32(i32, ptr, ptr, i32)
declare void @llvm.assume(i1)
declare void @llvm.experimental.noalias.scope.decl(metadata)
declare void @llvm.sideeffect()
declare void @llvm.dbg.declare(metadata, metadata, metadata)
declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
declare void @llvm.dbg.label(metadata)
-declare {}* @llvm.invariant.start.p0i8(i64, i8*)
-declare void @llvm.invariant.end.p0i8({}*, i64, i8*)
-declare i8* @llvm.launder.invariant.group.p0i8(i8*)
-declare i8* @llvm.strip.invariant.group.p0i8(i8*)
+declare ptr @llvm.invariant.start.p0(i64, ptr)
+declare void @llvm.invariant.end.p0(ptr, i64, ptr)
+declare ptr @llvm.launder.invariant.group.p0(ptr)
+declare ptr @llvm.strip.invariant.group.p0(ptr)
declare i1 @llvm.is.constant.i32(i32)
-declare void @llvm.lifetime.start.p0i8(i64, i8*)
-declare void @llvm.lifetime.end.p0i8(i64, i8*)
-declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1)
-declare i8* @llvm.ptr.annotation.p0i8(i8*, i8*, i8*, i32, i8*)
-declare void @llvm.var.annotation(i8*, i8*, i8*, i32, i8*)
+declare void @llvm.lifetime.start.p0(i64, ptr)
+declare void @llvm.lifetime.end.p0(i64, ptr)
+declare i64 @llvm.objectsize.i64.p0(ptr, i1, i1, i1)
+declare ptr @llvm.ptr.annotation.p0(ptr, ptr, ptr, i32, ptr)
+declare void @llvm.var.annotation(ptr, ptr, ptr, i32, ptr)
!0 = !DILocalVariable(scope: !1)
define i32 @trivially_free() {
; CHECK-SIZE-LABEL: 'trivially_free'
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = call i32 @llvm.annotation.i32.p0i8(i32 undef, i8* undef, i8* undef, i32 undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = call i32 @llvm.annotation.i32.p0(i32 undef, ptr undef, ptr undef, i32 undef)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.assume(i1 undef)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.experimental.noalias.scope.decl(metadata !0)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.sideeffect()
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = call {}* @llvm.invariant.start.p0i8(i64 1, i8* undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.invariant.end.p0i8({}* undef, i64 1, i8* undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = call i8* @llvm.strip.invariant.group.p0i8(i8* undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = call ptr @llvm.invariant.start.p0(i64 1, ptr undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.invariant.end.p0(ptr undef, i64 1, ptr undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = call ptr @llvm.launder.invariant.group.p0(ptr undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = call ptr @llvm.strip.invariant.group.p0(ptr undef)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = call i1 @llvm.is.constant.i32(i32 undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0i8(i64 1, i8* undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0i8(i64 1, i8* undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 true, i1 true, i1 true)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a6 = call i8* @llvm.ptr.annotation.p0i8.p0i8(i8* undef, i8* undef, i8* undef, i32 undef, i8* undef)
-; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation.p0i8.p0i8(i8* undef, i8* undef, i8* undef, i32 undef, i8* undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0(i64 1, ptr undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0(i64 1, ptr undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0(ptr undef, i1 true, i1 true, i1 true)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a6 = call ptr @llvm.ptr.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
+; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; CHECK-THROUGHPUT-LABEL: 'trivially_free'
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = call i32 @llvm.annotation.i32.p0i8(i32 undef, i8* undef, i8* undef, i32 undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = call i32 @llvm.annotation.i32.p0(i32 undef, ptr undef, ptr undef, i32 undef)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.assume(i1 undef)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.experimental.noalias.scope.decl(metadata !0)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.sideeffect()
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = call {}* @llvm.invariant.start.p0i8(i64 1, i8* undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.invariant.end.p0i8({}* undef, i64 1, i8* undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = call i8* @llvm.strip.invariant.group.p0i8(i8* undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = call ptr @llvm.invariant.start.p0(i64 1, ptr undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.invariant.end.p0(ptr undef, i64 1, ptr undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = call ptr @llvm.launder.invariant.group.p0(ptr undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = call ptr @llvm.strip.invariant.group.p0(ptr undef)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = call i1 @llvm.is.constant.i32(i32 undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0i8(i64 1, i8* undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0i8(i64 1, i8* undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 true, i1 true, i1 true)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a6 = call i8* @llvm.ptr.annotation.p0i8.p0i8(i8* undef, i8* undef, i8* undef, i32 undef, i8* undef)
-; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation.p0i8.p0i8(i8* undef, i8* undef, i8* undef, i32 undef, i8* undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.start.p0(i64 1, ptr undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.lifetime.end.p0(i64 1, ptr undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = call i64 @llvm.objectsize.i64.p0(ptr undef, i1 true, i1 true, i1 true)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a6 = call ptr @llvm.ptr.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
+; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: call void @llvm.var.annotation.p0.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
- %a0 = call i32 @llvm.annotation.i32(i32 undef, i8* undef, i8* undef, i32 undef)
+ %a0 = call i32 @llvm.annotation.i32(i32 undef, ptr undef, ptr undef, i32 undef)
call void @llvm.assume(i1 undef)
call void @llvm.experimental.noalias.scope.decl(metadata !4)
call void @llvm.sideeffect()
- call void @llvm.dbg.declare(metadata i8** undef, metadata !0, metadata !DIExpression())
+ call void @llvm.dbg.declare(metadata ptr undef, metadata !0, metadata !DIExpression())
call void @llvm.dbg.value(metadata i64 undef, i64 undef, metadata !DIExpression(), metadata !DIExpression())
call void @llvm.dbg.label(metadata !2)
- %a1 = call {}* @llvm.invariant.start.p0i8(i64 1, i8* undef)
- call void @llvm.invariant.end.p0i8({}* undef, i64 1, i8* undef)
- %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* undef)
- %a3 = call i8* @llvm.strip.invariant.group.p0i8(i8* undef)
+ %a1 = call ptr @llvm.invariant.start.p0(i64 1, ptr undef)
+ call void @llvm.invariant.end.p0(ptr undef, i64 1, ptr undef)
+ %a2 = call ptr @llvm.launder.invariant.group.p0(ptr undef)
+ %a3 = call ptr @llvm.strip.invariant.group.p0(ptr undef)
%a4 = call i1 @llvm.is.constant.i32(i32 undef)
- call void @llvm.lifetime.start.p0i8(i64 1, i8* undef)
- call void @llvm.lifetime.end.p0i8(i64 1, i8* undef)
- %a5 = call i64 @llvm.objectsize.i64.p0i8(i8* undef, i1 1, i1 1, i1 1)
- %a6 = call i8* @llvm.ptr.annotation.p0i8(i8* undef, i8* undef, i8* undef, i32 undef, i8* undef)
- call void @llvm.var.annotation(i8* undef, i8* undef, i8* undef, i32 undef, i8* undef)
+ call void @llvm.lifetime.start.p0(i64 1, ptr undef)
+ call void @llvm.lifetime.end.p0(i64 1, ptr undef)
+ %a5 = call i64 @llvm.objectsize.i64.p0(ptr undef, i1 1, i1 1, i1 1)
+ %a6 = call ptr @llvm.ptr.annotation.p0(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
+ call void @llvm.var.annotation(ptr undef, ptr undef, ptr undef, i32 undef, ptr undef)
ret i32 undef
}
-declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32)
+declare i32 @llvm.annotation.i32(i32, ptr, ptr, i32)
declare void @llvm.assume(i1)
declare void @llvm.experimental.noalias.scope.decl(metadata)
declare void @llvm.sideeffect()
declare void @llvm.dbg.declare(metadata, metadata, metadata)
declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
declare void @llvm.dbg.label(metadata)
-declare {}* @llvm.invariant.start.p0i8(i64, i8*)
-declare void @llvm.invariant.end.p0i8({}*, i64, i8*)
-declare i8* @llvm.launder.invariant.group.p0i8(i8*)
-declare i8* @llvm.strip.invariant.group.p0i8(i8*)
+declare ptr @llvm.invariant.start.p0(i64, ptr)
+declare void @llvm.invariant.end.p0(ptr, i64, ptr)
+declare ptr @llvm.launder.invariant.group.p0(ptr)
+declare ptr @llvm.strip.invariant.group.p0(ptr)
declare i1 @llvm.is.constant.i32(i32)
-declare void @llvm.lifetime.start.p0i8(i64, i8*)
-declare void @llvm.lifetime.end.p0i8(i64, i8*)
-declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1)
-declare i8* @llvm.ptr.annotation.p0i8(i8*, i8*, i8*, i32, i8*)
-declare void @llvm.var.annotation(i8*, i8*, i8*, i32, i8*)
+declare void @llvm.lifetime.start.p0(i64, ptr)
+declare void @llvm.lifetime.end.p0(i64, ptr)
+declare i64 @llvm.objectsize.i64.p0(ptr, i1, i1, i1)
+declare ptr @llvm.ptr.annotation.p0(ptr, ptr, ptr, i32, ptr)
+declare void @llvm.var.annotation(ptr, ptr, ptr, i32, ptr)
!0 = !DILocalVariable(scope: !1)
ret i32 %e
}
-define i8 @addressing_mode_reg_reg(i8* %a, i32 %b) {
+define i8 @addressing_mode_reg_reg(ptr %a, i32 %b) {
; CHECK-LABEL: function 'addressing_mode_reg_reg'
- %p = getelementptr i8, i8* %a, i32 %b ; NoTTI accepts reg+reg addressing.
+ %p = getelementptr i8, ptr %a, i32 %b ; NoTTI accepts reg+reg addressing.
; CHECK: cost of 0 {{.*}} getelementptr
- %v = load i8, i8* %p
+ %v = load i8, ptr %p
ret i8 %v
}
; CHECK-LABEL: function 'addressing_mode_scaled_reg'
-define i32 @addressing_mode_scaled_reg(i32* %a, i32 %b) {
+define i32 @addressing_mode_scaled_reg(ptr %a, i32 %b) {
; NoTTI rejects reg+scale*reg addressing.
- %p = getelementptr i32, i32* %a, i32 %b
+ %p = getelementptr i32, ptr %a, i32 %b
; CHECK: cost of 1 {{.*}} getelementptr
- %v = load i32, i32* %p
+ %v = load i32, ptr %p
ret i32 %v
}