; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -opaque-pointers=0 -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=0 | FileCheck --check-prefixes=CHECK,CHECK-O0,CHECK-O0-CUR %s
-; RUN: llc -opaque-pointers=0 -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=0 | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-CUR %s
-; RUN: llc -opaque-pointers=0 -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=1 | FileCheck --check-prefixes=CHECK,CHECK-O0,CHECK-O0-EX %s
-; RUN: llc -opaque-pointers=0 -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=1 | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-EX %s
+; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=0 | FileCheck --check-prefixes=CHECK,CHECK-O0,CHECK-O0-CUR %s
+; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=0 | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-CUR %s
+; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=1 | FileCheck --check-prefixes=CHECK,CHECK-O0,CHECK-O0-EX %s
+; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=1 | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-EX %s
-define i8 @load_i8(i8* %ptr) {
+define i8 @load_i8(ptr %ptr) {
; CHECK-O0-LABEL: load_i8:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movb (%rdi), %al
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movzbl (%rdi), %eax
; CHECK-O3-NEXT: retq
- %v = load atomic i8, i8* %ptr unordered, align 1
+ %v = load atomic i8, ptr %ptr unordered, align 1
ret i8 %v
}
-define void @store_i8(i8* %ptr, i8 %v) {
+define void @store_i8(ptr %ptr, i8 %v) {
; CHECK-O0-LABEL: store_i8:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movb %sil, %al
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movb %sil, (%rdi)
; CHECK-O3-NEXT: retq
- store atomic i8 %v, i8* %ptr unordered, align 1
+ store atomic i8 %v, ptr %ptr unordered, align 1
ret void
}
-define i16 @load_i16(i16* %ptr) {
+define i16 @load_i16(ptr %ptr) {
; CHECK-O0-LABEL: load_i16:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movw (%rdi), %ax
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movzwl (%rdi), %eax
; CHECK-O3-NEXT: retq
- %v = load atomic i16, i16* %ptr unordered, align 2
+ %v = load atomic i16, ptr %ptr unordered, align 2
ret i16 %v
}
-define void @store_i16(i16* %ptr, i16 %v) {
+define void @store_i16(ptr %ptr, i16 %v) {
; CHECK-O0-LABEL: store_i16:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movw %si, %ax
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movw %si, (%rdi)
; CHECK-O3-NEXT: retq
- store atomic i16 %v, i16* %ptr unordered, align 2
+ store atomic i16 %v, ptr %ptr unordered, align 2
ret void
}
-define i32 @load_i32(i32* %ptr) {
+define i32 @load_i32(ptr %ptr) {
; CHECK-LABEL: load_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: retq
- %v = load atomic i32, i32* %ptr unordered, align 4
+ %v = load atomic i32, ptr %ptr unordered, align 4
ret i32 %v
}
-define void @store_i32(i32* %ptr, i32 %v) {
+define void @store_i32(ptr %ptr, i32 %v) {
; CHECK-LABEL: store_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, (%rdi)
; CHECK-NEXT: retq
- store atomic i32 %v, i32* %ptr unordered, align 4
+ store atomic i32 %v, ptr %ptr unordered, align 4
ret void
}
-define i64 @load_i64(i64* %ptr) {
+define i64 @load_i64(ptr %ptr) {
; CHECK-LABEL: load_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: retq
- %v = load atomic i64, i64* %ptr unordered, align 8
+ %v = load atomic i64, ptr %ptr unordered, align 8
ret i64 %v
}
-define void @store_i64(i64* %ptr, i64 %v) {
+define void @store_i64(ptr %ptr, i64 %v) {
; CHECK-LABEL: store_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, (%rdi)
; CHECK-NEXT: retq
- store atomic i64 %v, i64* %ptr unordered, align 8
+ store atomic i64 %v, ptr %ptr unordered, align 8
ret void
}
;; Start w/some clearly illegal ones.
; Must use a full width op, not a byte op
-define void @narrow_writeback_or(i64* %ptr) {
+define void @narrow_writeback_or(ptr %ptr) {
; CHECK-O0-LABEL: narrow_writeback_or:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: orq $7, (%rdi)
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %ptr unordered, align 8
+ %v = load atomic i64, ptr %ptr unordered, align 8
%v.new = or i64 %v, 7
- store atomic i64 %v.new, i64* %ptr unordered, align 8
+ store atomic i64 %v.new, ptr %ptr unordered, align 8
ret void
}
; Must use a full width op, not a byte op
-define void @narrow_writeback_and(i64* %ptr) {
+define void @narrow_writeback_and(ptr %ptr) {
; CHECK-O0-LABEL: narrow_writeback_and:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: movl $4294967040, %eax # imm = 0xFFFFFF00
; CHECK-O3-NEXT: andq %rax, (%rdi)
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %ptr unordered, align 8
+ %v = load atomic i64, ptr %ptr unordered, align 8
%v.new = and i64 %v, 4294967040 ;; 0xFFFF_FF00
- store atomic i64 %v.new, i64* %ptr unordered, align 8
+ store atomic i64 %v.new, ptr %ptr unordered, align 8
ret void
}
; Must use a full width op, not a byte op
-define void @narrow_writeback_xor(i64* %ptr) {
+define void @narrow_writeback_xor(ptr %ptr) {
; CHECK-O0-LABEL: narrow_writeback_xor:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: xorq $7, (%rdi)
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %ptr unordered, align 8
+ %v = load atomic i64, ptr %ptr unordered, align 8
%v.new = xor i64 %v, 7
- store atomic i64 %v.new, i64* %ptr unordered, align 8
+ store atomic i64 %v.new, ptr %ptr unordered, align 8
ret void
}
;; approach to incremental improvement.
; Legal if wider type is also atomic (TODO)
-define void @widen_store(i32* %p0, i32 %v1, i32 %v2) {
+define void @widen_store(ptr %p0, i32 %v1, i32 %v2) {
; CHECK-LABEL: widen_store:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, (%rdi)
; CHECK-NEXT: movl %edx, 4(%rdi)
; CHECK-NEXT: retq
- %p1 = getelementptr i32, i32* %p0, i64 1
- store atomic i32 %v1, i32* %p0 unordered, align 8
- store atomic i32 %v2, i32* %p1 unordered, align 4
+ %p1 = getelementptr i32, ptr %p0, i64 1
+ store atomic i32 %v1, ptr %p0 unordered, align 8
+ store atomic i32 %v2, ptr %p1 unordered, align 4
ret void
}
; This one is *NOT* legal to widen. With weaker alignment,
; the wider type might cross a cache line and violate the
; atomicity requirement.
-define void @widen_store_unaligned(i32* %p0, i32 %v1, i32 %v2) {
+define void @widen_store_unaligned(ptr %p0, i32 %v1, i32 %v2) {
; CHECK-LABEL: widen_store_unaligned:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, (%rdi)
; CHECK-NEXT: movl %edx, 4(%rdi)
; CHECK-NEXT: retq
- %p1 = getelementptr i32, i32* %p0, i64 1
- store atomic i32 %v1, i32* %p0 unordered, align 4
- store atomic i32 %v2, i32* %p1 unordered, align 4
+ %p1 = getelementptr i32, ptr %p0, i64 1
+ store atomic i32 %v1, ptr %p0 unordered, align 4
+ store atomic i32 %v2, ptr %p1 unordered, align 4
ret void
}
; Legal if wider type is also atomic (TODO)
-define void @widen_broadcast(i32* %p0, i32 %v) {
+define void @widen_broadcast(ptr %p0, i32 %v) {
; CHECK-LABEL: widen_broadcast:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, (%rdi)
; CHECK-NEXT: movl %esi, 4(%rdi)
; CHECK-NEXT: retq
- %p1 = getelementptr i32, i32* %p0, i64 1
- store atomic i32 %v, i32* %p0 unordered, align 8
- store atomic i32 %v, i32* %p1 unordered, align 4
+ %p1 = getelementptr i32, ptr %p0, i64 1
+ store atomic i32 %v, ptr %p0 unordered, align 8
+ store atomic i32 %v, ptr %p1 unordered, align 4
ret void
}
; Not legal to widen due to alignment restriction
-define void @widen_broadcast_unaligned(i32* %p0, i32 %v) {
+define void @widen_broadcast_unaligned(ptr %p0, i32 %v) {
; CHECK-LABEL: widen_broadcast_unaligned:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, (%rdi)
; CHECK-NEXT: movl %esi, 4(%rdi)
; CHECK-NEXT: retq
- %p1 = getelementptr i32, i32* %p0, i64 1
- store atomic i32 %v, i32* %p0 unordered, align 4
- store atomic i32 %v, i32* %p1 unordered, align 4
+ %p1 = getelementptr i32, ptr %p0, i64 1
+ store atomic i32 %v, ptr %p0 unordered, align 4
+ store atomic i32 %v, ptr %p1 unordered, align 4
ret void
}
-define i128 @load_i128(i128* %ptr) {
+define i128 @load_i128(ptr %ptr) {
; CHECK-O0-LABEL: load_i128:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: pushq %rbx
; CHECK-O3-NEXT: popq %rbx
; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
; CHECK-O3-NEXT: retq
- %v = load atomic i128, i128* %ptr unordered, align 16
+ %v = load atomic i128, ptr %ptr unordered, align 16
ret i128 %v
}
-define void @store_i128(i128* %ptr, i128 %v) {
+define void @store_i128(ptr %ptr, i128 %v) {
; CHECK-O0-LABEL: store_i128:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: pushq %rbx
; CHECK-O3-NEXT: popq %rbx
; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
; CHECK-O3-NEXT: retq
- store atomic i128 %v, i128* %ptr unordered, align 16
+ store atomic i128 %v, ptr %ptr unordered, align 16
ret void
}
-define i256 @load_i256(i256* %ptr) {
+define i256 @load_i256(ptr %ptr) {
; CHECK-O0-LABEL: load_i256:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: subq $56, %rsp
; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
; CHECK-O3-NEXT: vzeroupper
; CHECK-O3-NEXT: retq
- %v = load atomic i256, i256* %ptr unordered, align 16
+ %v = load atomic i256, ptr %ptr unordered, align 16
ret i256 %v
}
-define void @store_i256(i256* %ptr, i256 %v) {
+define void @store_i256(ptr %ptr, i256 %v) {
; CHECK-O0-LABEL: store_i256:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: subq $40, %rsp
; CHECK-O0-NEXT: .cfi_def_cfa_offset 48
-; CHECK-O0-NEXT: movq %rdx, %rax
-; CHECK-O0-NEXT: movq %rsi, (%rsp) # 8-byte Spill
+; CHECK-O0-NEXT: movq %rsi, %rax
; CHECK-O0-NEXT: movq %rdi, %rsi
-; CHECK-O0-NEXT: movq (%rsp), %rdi # 8-byte Reload
-; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
-; CHECK-O0-NEXT: movq %rdi, {{[0-9]+}}(%rsp)
; CHECK-O0-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-O0-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; CHECK-O0-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
; CHECK-O0-NEXT: movq %r8, {{[0-9]+}}(%rsp)
; CHECK-O0-NEXT: movl $32, %edi
+; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
; CHECK-O0-NEXT: xorl %ecx, %ecx
; CHECK-O0-NEXT: callq __atomic_store@PLT
; CHECK-O0-NEXT: addq $40, %rsp
; CHECK-O3-NEXT: addq $40, %rsp
; CHECK-O3-NEXT: .cfi_def_cfa_offset 8
; CHECK-O3-NEXT: retq
- store atomic i256 %v, i256* %ptr unordered, align 16
+ store atomic i256 %v, ptr %ptr unordered, align 16
ret void
}
; Legal if wider type is also atomic (TODO)
-define void @vec_store(i32* %p0, <2 x i32> %vec) {
+define void @vec_store(ptr %p0, <2 x i32> %vec) {
; CHECK-O0-CUR-LABEL: vec_store:
; CHECK-O0-CUR: # %bb.0:
; CHECK-O0-CUR-NEXT: vmovd %xmm0, %ecx
; CHECK-O3-EX-NEXT: retq
%v1 = extractelement <2 x i32> %vec, i32 0
%v2 = extractelement <2 x i32> %vec, i32 1
- %p1 = getelementptr i32, i32* %p0, i64 1
- store atomic i32 %v1, i32* %p0 unordered, align 8
- store atomic i32 %v2, i32* %p1 unordered, align 4
+ %p1 = getelementptr i32, ptr %p0, i64 1
+ store atomic i32 %v1, ptr %p0 unordered, align 8
+ store atomic i32 %v2, ptr %p1 unordered, align 4
ret void
}
; Not legal to widen due to alignment restriction
-define void @vec_store_unaligned(i32* %p0, <2 x i32> %vec) {
+define void @vec_store_unaligned(ptr %p0, <2 x i32> %vec) {
; CHECK-O0-CUR-LABEL: vec_store_unaligned:
; CHECK-O0-CUR: # %bb.0:
; CHECK-O0-CUR-NEXT: vmovd %xmm0, %ecx
; CHECK-O3-EX-NEXT: retq
%v1 = extractelement <2 x i32> %vec, i32 0
%v2 = extractelement <2 x i32> %vec, i32 1
- %p1 = getelementptr i32, i32* %p0, i64 1
- store atomic i32 %v1, i32* %p0 unordered, align 4
- store atomic i32 %v2, i32* %p1 unordered, align 4
+ %p1 = getelementptr i32, ptr %p0, i64 1
+ store atomic i32 %v1, ptr %p0 unordered, align 4
+ store atomic i32 %v2, ptr %p1 unordered, align 4
ret void
}
; Legal if wider type is also atomic (TODO)
; Also, can avoid register move from xmm to eax (TODO)
-define void @widen_broadcast2(i32* %p0, <2 x i32> %vec) {
+define void @widen_broadcast2(ptr %p0, <2 x i32> %vec) {
; CHECK-O0-CUR-LABEL: widen_broadcast2:
; CHECK-O0-CUR: # %bb.0:
; CHECK-O0-CUR-NEXT: vmovd %xmm0, %eax
; CHECK-O3-EX-NEXT: vmovss %xmm0, 4(%rdi)
; CHECK-O3-EX-NEXT: retq
%v1 = extractelement <2 x i32> %vec, i32 0
- %p1 = getelementptr i32, i32* %p0, i64 1
- store atomic i32 %v1, i32* %p0 unordered, align 8
- store atomic i32 %v1, i32* %p1 unordered, align 4
+ %p1 = getelementptr i32, ptr %p0, i64 1
+ store atomic i32 %v1, ptr %p0 unordered, align 8
+ store atomic i32 %v1, ptr %p1 unordered, align 4
ret void
}
; Not legal to widen due to alignment restriction
-define void @widen_broadcast2_unaligned(i32* %p0, <2 x i32> %vec) {
+define void @widen_broadcast2_unaligned(ptr %p0, <2 x i32> %vec) {
; CHECK-O0-CUR-LABEL: widen_broadcast2_unaligned:
; CHECK-O0-CUR: # %bb.0:
; CHECK-O0-CUR-NEXT: vmovd %xmm0, %eax
; CHECK-O3-EX-NEXT: vmovss %xmm0, 4(%rdi)
; CHECK-O3-EX-NEXT: retq
%v1 = extractelement <2 x i32> %vec, i32 0
- %p1 = getelementptr i32, i32* %p0, i64 1
- store atomic i32 %v1, i32* %p0 unordered, align 4
- store atomic i32 %v1, i32* %p1 unordered, align 4
+ %p1 = getelementptr i32, ptr %p0, i64 1
+ store atomic i32 %v1, ptr %p0 unordered, align 4
+ store atomic i32 %v1, ptr %p1 unordered, align 4
ret void
}
; Legal if wider type is also atomic (TODO)
-define void @widen_zero_init(i32* %p0, i32 %v1, i32 %v2) {
+define void @widen_zero_init(ptr %p0, i32 %v1, i32 %v2) {
; CHECK-LABEL: widen_zero_init:
; CHECK: # %bb.0:
; CHECK-NEXT: movl $0, (%rdi)
; CHECK-NEXT: movl $0, 4(%rdi)
; CHECK-NEXT: retq
- %p1 = getelementptr i32, i32* %p0, i64 1
- store atomic i32 0, i32* %p0 unordered, align 8
- store atomic i32 0, i32* %p1 unordered, align 4
+ %p1 = getelementptr i32, ptr %p0, i64 1
+ store atomic i32 0, ptr %p0 unordered, align 8
+ store atomic i32 0, ptr %p1 unordered, align 4
ret void
}
; Not legal to widen due to alignment restriction
-define void @widen_zero_init_unaligned(i32* %p0, i32 %v1, i32 %v2) {
+define void @widen_zero_init_unaligned(ptr %p0, i32 %v1, i32 %v2) {
; CHECK-LABEL: widen_zero_init_unaligned:
; CHECK: # %bb.0:
; CHECK-NEXT: movl $0, (%rdi)
; CHECK-NEXT: movl $0, 4(%rdi)
; CHECK-NEXT: retq
- %p1 = getelementptr i32, i32* %p0, i64 1
- store atomic i32 0, i32* %p0 unordered, align 4
- store atomic i32 0, i32* %p1 unordered, align 4
+ %p1 = getelementptr i32, ptr %p0, i64 1
+ store atomic i32 0, ptr %p0 unordered, align 4
+ store atomic i32 0, ptr %p1 unordered, align 4
ret void
}
;; on x86, so these are simply checking optimization quality.
; Legal, as expected
-define i64 @load_fold_add1(i64* %p) {
+define i64 @load_fold_add1(ptr %p) {
; CHECK-LABEL: load_fold_add1:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: addq $15, %rax
; CHECK-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = add i64 %v, 15
ret i64 %ret
}
-define i64 @load_fold_add2(i64* %p, i64 %v2) {
+define i64 @load_fold_add2(ptr %p, i64 %v2) {
; CHECK-LABEL: load_fold_add2:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: addq (%rdi), %rax
; CHECK-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = add i64 %v, %v2
ret i64 %ret
}
-define i64 @load_fold_add3(i64* %p1, i64* %p2) {
+define i64 @load_fold_add3(ptr %p1, ptr %p2) {
; CHECK-O0-LABEL: load_fold_add3:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: addq (%rsi), %rax
; CHECK-O3-EX-NEXT: retq
- %v = load atomic i64, i64* %p1 unordered, align 8
- %v2 = load atomic i64, i64* %p2 unordered, align 8
+ %v = load atomic i64, ptr %p1 unordered, align 8
+ %v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = add i64 %v, %v2
ret i64 %ret
}
; Legal, as expected
-define i64 @load_fold_sub1(i64* %p) {
+define i64 @load_fold_sub1(ptr %p) {
; CHECK-O0-LABEL: load_fold_sub1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: addq $-15, %rax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = sub i64 %v, 15
ret i64 %ret
}
-define i64 @load_fold_sub2(i64* %p, i64 %v2) {
+define i64 @load_fold_sub2(ptr %p, i64 %v2) {
; CHECK-LABEL: load_fold_sub2:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: subq %rsi, %rax
; CHECK-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = sub i64 %v, %v2
ret i64 %ret
}
-define i64 @load_fold_sub3(i64* %p1, i64* %p2) {
+define i64 @load_fold_sub3(ptr %p1, ptr %p2) {
; CHECK-LABEL: load_fold_sub3:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: subq (%rsi), %rax
; CHECK-NEXT: retq
- %v = load atomic i64, i64* %p1 unordered, align 8
- %v2 = load atomic i64, i64* %p2 unordered, align 8
+ %v = load atomic i64, ptr %p1 unordered, align 8
+ %v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = sub i64 %v, %v2
ret i64 %ret
}
; Legal, as expected
-define i64 @load_fold_mul1(i64* %p) {
+define i64 @load_fold_mul1(ptr %p) {
; CHECK-O0-LABEL: load_fold_mul1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: imulq $15, (%rdi), %rax
; CHECK-O3-NEXT: leaq (%rax,%rax,4), %rax
; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = mul i64 %v, 15
ret i64 %ret
}
-define i64 @load_fold_mul2(i64* %p, i64 %v2) {
+define i64 @load_fold_mul2(ptr %p, i64 %v2) {
; CHECK-LABEL: load_fold_mul2:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: imulq (%rdi), %rax
; CHECK-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = mul i64 %v, %v2
ret i64 %ret
}
-define i64 @load_fold_mul3(i64* %p1, i64* %p2) {
+define i64 @load_fold_mul3(ptr %p1, ptr %p2) {
; CHECK-O0-LABEL: load_fold_mul3:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: imulq (%rsi), %rax
; CHECK-O3-EX-NEXT: retq
- %v = load atomic i64, i64* %p1 unordered, align 8
- %v2 = load atomic i64, i64* %p2 unordered, align 8
+ %v = load atomic i64, ptr %p1 unordered, align 8
+ %v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = mul i64 %v, %v2
ret i64 %ret
}
; Legal to fold (TODO)
-define i64 @load_fold_sdiv1(i64* %p) {
+define i64 @load_fold_sdiv1(ptr %p) {
; CHECK-O0-LABEL: load_fold_sdiv1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: addq %rax, %rcx
; CHECK-O3-NEXT: movq %rcx, %rax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = sdiv i64 %v, 15
ret i64 %ret
}
; Legal to fold (TODO)
-define i64 @load_fold_sdiv2(i64* %p, i64 %v2) {
+define i64 @load_fold_sdiv2(ptr %p, i64 %v2) {
; CHECK-O0-LABEL: load_fold_sdiv2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: divl %esi
; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = sdiv i64 %v, %v2
ret i64 %ret
}
-define i64 @load_fold_sdiv3(i64* %p1, i64* %p2) {
+define i64 @load_fold_sdiv3(ptr %p1, ptr %p2) {
; CHECK-O0-LABEL: load_fold_sdiv3:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: divl %ecx
; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p1 unordered, align 8
- %v2 = load atomic i64, i64* %p2 unordered, align 8
+ %v = load atomic i64, ptr %p1 unordered, align 8
+ %v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = sdiv i64 %v, %v2
ret i64 %ret
}
; Legal to fold (TODO)
-define i64 @load_fold_udiv1(i64* %p) {
+define i64 @load_fold_udiv1(ptr %p) {
; CHECK-O0-LABEL: load_fold_udiv1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: mulxq (%rdi), %rax, %rax
; CHECK-O3-EX-NEXT: shrq $3, %rax
; CHECK-O3-EX-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = udiv i64 %v, 15
ret i64 %ret
}
-define i64 @load_fold_udiv2(i64* %p, i64 %v2) {
+define i64 @load_fold_udiv2(ptr %p, i64 %v2) {
; CHECK-O0-LABEL: load_fold_udiv2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: divl %esi
; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = udiv i64 %v, %v2
ret i64 %ret
}
-define i64 @load_fold_udiv3(i64* %p1, i64* %p2) {
+define i64 @load_fold_udiv3(ptr %p1, ptr %p2) {
; CHECK-O0-LABEL: load_fold_udiv3:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: divl %ecx
; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p1 unordered, align 8
- %v2 = load atomic i64, i64* %p2 unordered, align 8
+ %v = load atomic i64, ptr %p1 unordered, align 8
+ %v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = udiv i64 %v, %v2
ret i64 %ret
}
; Legal to fold (TODO)
-define i64 @load_fold_srem1(i64* %p) {
+define i64 @load_fold_srem1(ptr %p) {
; CHECK-O0-LABEL: load_fold_srem1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: subq %rax, %rcx
; CHECK-O3-NEXT: movq %rcx, %rax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = srem i64 %v, 15
ret i64 %ret
}
; Legal, as expected
-define i64 @load_fold_srem2(i64* %p, i64 %v2) {
+define i64 @load_fold_srem2(ptr %p, i64 %v2) {
; CHECK-O0-LABEL: load_fold_srem2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: divl %esi
; CHECK-O3-NEXT: movl %edx, %eax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = srem i64 %v, %v2
ret i64 %ret
}
-define i64 @load_fold_srem3(i64* %p1, i64* %p2) {
+define i64 @load_fold_srem3(ptr %p1, ptr %p2) {
; CHECK-O0-LABEL: load_fold_srem3:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: divl %ecx
; CHECK-O3-NEXT: movl %edx, %eax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p1 unordered, align 8
- %v2 = load atomic i64, i64* %p2 unordered, align 8
+ %v = load atomic i64, ptr %p1 unordered, align 8
+ %v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = srem i64 %v, %v2
ret i64 %ret
}
; Legal to fold (TODO)
-define i64 @load_fold_urem1(i64* %p) {
+define i64 @load_fold_urem1(ptr %p) {
; CHECK-O0-LABEL: load_fold_urem1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: leaq (%rcx,%rcx,2), %rcx
; CHECK-O3-NEXT: subq %rcx, %rax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = urem i64 %v, 15
ret i64 %ret
}
; Legal, as expected
-define i64 @load_fold_urem2(i64* %p, i64 %v2) {
+define i64 @load_fold_urem2(ptr %p, i64 %v2) {
; CHECK-O0-LABEL: load_fold_urem2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: divl %esi
; CHECK-O3-NEXT: movl %edx, %eax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = urem i64 %v, %v2
ret i64 %ret
}
-define i64 @load_fold_urem3(i64* %p1, i64* %p2) {
+define i64 @load_fold_urem3(ptr %p1, ptr %p2) {
; CHECK-O0-LABEL: load_fold_urem3:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: divl %ecx
; CHECK-O3-NEXT: movl %edx, %eax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p1 unordered, align 8
- %v2 = load atomic i64, i64* %p2 unordered, align 8
+ %v = load atomic i64, ptr %p1 unordered, align 8
+ %v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = urem i64 %v, %v2
ret i64 %ret
}
; Legal, as expected
-define i64 @load_fold_shl1(i64* %p) {
+define i64 @load_fold_shl1(ptr %p) {
; CHECK-LABEL: load_fold_shl1:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: shlq $15, %rax
; CHECK-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = shl i64 %v, 15
ret i64 %ret
}
-define i64 @load_fold_shl2(i64* %p, i64 %v2) {
+define i64 @load_fold_shl2(ptr %p, i64 %v2) {
; CHECK-O0-LABEL: load_fold_shl2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq %rsi, %rcx
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: shlxq %rsi, (%rdi), %rax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = shl i64 %v, %v2
ret i64 %ret
}
-define i64 @load_fold_shl3(i64* %p1, i64* %p2) {
+define i64 @load_fold_shl3(ptr %p1, ptr %p2) {
; CHECK-O0-LABEL: load_fold_shl3:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: movq (%rsi), %rax
; CHECK-O3-NEXT: shlxq %rax, (%rdi), %rax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p1 unordered, align 8
- %v2 = load atomic i64, i64* %p2 unordered, align 8
+ %v = load atomic i64, ptr %p1 unordered, align 8
+ %v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = shl i64 %v, %v2
ret i64 %ret
}
; Legal, as expected
-define i64 @load_fold_lshr1(i64* %p) {
+define i64 @load_fold_lshr1(ptr %p) {
; CHECK-LABEL: load_fold_lshr1:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: shrq $15, %rax
; CHECK-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = lshr i64 %v, 15
ret i64 %ret
}
-define i64 @load_fold_lshr2(i64* %p, i64 %v2) {
+define i64 @load_fold_lshr2(ptr %p, i64 %v2) {
; CHECK-O0-LABEL: load_fold_lshr2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq %rsi, %rcx
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: shrxq %rsi, (%rdi), %rax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = lshr i64 %v, %v2
ret i64 %ret
}
-define i64 @load_fold_lshr3(i64* %p1, i64* %p2) {
+define i64 @load_fold_lshr3(ptr %p1, ptr %p2) {
; CHECK-O0-LABEL: load_fold_lshr3:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: movq (%rsi), %rax
; CHECK-O3-NEXT: shrxq %rax, (%rdi), %rax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p1 unordered, align 8
- %v2 = load atomic i64, i64* %p2 unordered, align 8
+ %v = load atomic i64, ptr %p1 unordered, align 8
+ %v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = lshr i64 %v, %v2
ret i64 %ret
}
; Legal, as expected
-define i64 @load_fold_ashr1(i64* %p) {
+define i64 @load_fold_ashr1(ptr %p) {
; CHECK-LABEL: load_fold_ashr1:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: sarq $15, %rax
; CHECK-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = ashr i64 %v, 15
ret i64 %ret
}
-define i64 @load_fold_ashr2(i64* %p, i64 %v2) {
+define i64 @load_fold_ashr2(ptr %p, i64 %v2) {
; CHECK-O0-LABEL: load_fold_ashr2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq %rsi, %rcx
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: sarxq %rsi, (%rdi), %rax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = ashr i64 %v, %v2
ret i64 %ret
}
-define i64 @load_fold_ashr3(i64* %p1, i64* %p2) {
+define i64 @load_fold_ashr3(ptr %p1, ptr %p2) {
; CHECK-O0-LABEL: load_fold_ashr3:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: movq (%rsi), %rax
; CHECK-O3-NEXT: sarxq %rax, (%rdi), %rax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p1 unordered, align 8
- %v2 = load atomic i64, i64* %p2 unordered, align 8
+ %v = load atomic i64, ptr %p1 unordered, align 8
+ %v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = ashr i64 %v, %v2
ret i64 %ret
}
; Legal, as expected
-define i64 @load_fold_and1(i64* %p) {
+define i64 @load_fold_and1(ptr %p) {
; CHECK-O0-LABEL: load_fold_and1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: andl $15, %eax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = and i64 %v, 15
ret i64 %ret
}
-define i64 @load_fold_and2(i64* %p, i64 %v2) {
+define i64 @load_fold_and2(ptr %p, i64 %v2) {
; CHECK-LABEL: load_fold_and2:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: andq (%rdi), %rax
; CHECK-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = and i64 %v, %v2
ret i64 %ret
}
-define i64 @load_fold_and3(i64* %p1, i64* %p2) {
+define i64 @load_fold_and3(ptr %p1, ptr %p2) {
; CHECK-O0-LABEL: load_fold_and3:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: andq (%rsi), %rax
; CHECK-O3-EX-NEXT: retq
- %v = load atomic i64, i64* %p1 unordered, align 8
- %v2 = load atomic i64, i64* %p2 unordered, align 8
+ %v = load atomic i64, ptr %p1 unordered, align 8
+ %v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = and i64 %v, %v2
ret i64 %ret
}
; Legal, as expected
-define i64 @load_fold_or1(i64* %p) {
+define i64 @load_fold_or1(ptr %p) {
; CHECK-LABEL: load_fold_or1:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: orq $15, %rax
; CHECK-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = or i64 %v, 15
ret i64 %ret
}
-define i64 @load_fold_or2(i64* %p, i64 %v2) {
+define i64 @load_fold_or2(ptr %p, i64 %v2) {
; CHECK-LABEL: load_fold_or2:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: orq (%rdi), %rax
; CHECK-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = or i64 %v, %v2
ret i64 %ret
}
-define i64 @load_fold_or3(i64* %p1, i64* %p2) {
+define i64 @load_fold_or3(ptr %p1, ptr %p2) {
; CHECK-O0-LABEL: load_fold_or3:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: orq (%rsi), %rax
; CHECK-O3-EX-NEXT: retq
- %v = load atomic i64, i64* %p1 unordered, align 8
- %v2 = load atomic i64, i64* %p2 unordered, align 8
+ %v = load atomic i64, ptr %p1 unordered, align 8
+ %v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = or i64 %v, %v2
ret i64 %ret
}
; Legal, as expected
-define i64 @load_fold_xor1(i64* %p) {
+define i64 @load_fold_xor1(ptr %p) {
; CHECK-LABEL: load_fold_xor1:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: xorq $15, %rax
; CHECK-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = xor i64 %v, 15
ret i64 %ret
}
-define i64 @load_fold_xor2(i64* %p, i64 %v2) {
+define i64 @load_fold_xor2(ptr %p, i64 %v2) {
; CHECK-LABEL: load_fold_xor2:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: xorq (%rdi), %rax
; CHECK-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = xor i64 %v, %v2
ret i64 %ret
}
-define i64 @load_fold_xor3(i64* %p1, i64* %p2) {
+define i64 @load_fold_xor3(ptr %p1, ptr %p2) {
; CHECK-O0-LABEL: load_fold_xor3:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: xorq (%rsi), %rax
; CHECK-O3-EX-NEXT: retq
- %v = load atomic i64, i64* %p1 unordered, align 8
- %v2 = load atomic i64, i64* %p2 unordered, align 8
+ %v = load atomic i64, ptr %p1 unordered, align 8
+ %v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = xor i64 %v, %v2
ret i64 %ret
}
-define i1 @load_fold_icmp1(i64* %p) {
+define i1 @load_fold_icmp1(ptr %p) {
; CHECK-O0-LABEL: load_fold_icmp1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: cmpq $15, (%rdi)
; CHECK-O3-NEXT: sete %al
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = icmp eq i64 %v, 15
ret i1 %ret
}
-define i1 @load_fold_icmp2(i64* %p, i64 %v2) {
+define i1 @load_fold_icmp2(ptr %p, i64 %v2) {
; CHECK-O0-LABEL: load_fold_icmp2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: cmpq %rsi, (%rdi)
; CHECK-O3-NEXT: sete %al
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = icmp eq i64 %v, %v2
ret i1 %ret
}
-define i1 @load_fold_icmp3(i64* %p1, i64* %p2) {
+define i1 @load_fold_icmp3(ptr %p1, ptr %p2) {
; CHECK-O0-LABEL: load_fold_icmp3:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: cmpq (%rsi), %rax
; CHECK-O3-EX-NEXT: sete %al
; CHECK-O3-EX-NEXT: retq
- %v = load atomic i64, i64* %p1 unordered, align 8
- %v2 = load atomic i64, i64* %p2 unordered, align 8
+ %v = load atomic i64, ptr %p1 unordered, align 8
+ %v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = icmp eq i64 %v, %v2
ret i1 %ret
}
;; required not to narrow the store though!
; Legal, as expected
-define void @rmw_fold_add1(i64* %p, i64 %v) {
+define void @rmw_fold_add1(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_add1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: addq $15, (%rdi)
; CHECK-O3-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = add i64 %prev, 15
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal, as expected
-define void @rmw_fold_add2(i64* %p, i64 %v) {
+define void @rmw_fold_add2(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_add2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: addq %rsi, (%rdi)
; CHECK-O3-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = add i64 %prev, %v
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal, as expected
-define void @rmw_fold_sub1(i64* %p, i64 %v) {
+define void @rmw_fold_sub1(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_sub1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: addq $-15, (%rdi)
; CHECK-O3-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = sub i64 %prev, 15
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal, as expected
-define void @rmw_fold_sub2(i64* %p, i64 %v) {
+define void @rmw_fold_sub2(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_sub2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: subq %rsi, (%rdi)
; CHECK-O3-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = sub i64 %prev, %v
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal, as expected
-define void @rmw_fold_mul1(i64* %p, i64 %v) {
+define void @rmw_fold_mul1(ptr %p, i64 %v) {
; CHECK-LABEL: rmw_fold_mul1:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: leaq (%rax,%rax,2), %rax
; CHECK-NEXT: movq %rax, (%rdi)
; CHECK-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = mul i64 %prev, 15
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal to fold (TODO)
-define void @rmw_fold_mul2(i64* %p, i64 %v) {
+define void @rmw_fold_mul2(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_mul2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: imulq (%rdi), %rsi
; CHECK-O3-NEXT: movq %rsi, (%rdi)
; CHECK-O3-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = mul i64 %prev, %v
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal, as expected
-define void @rmw_fold_sdiv1(i64* %p, i64 %v) {
+define void @rmw_fold_sdiv1(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_sdiv1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rcx
; CHECK-O3-NEXT: addq %rax, %rdx
; CHECK-O3-NEXT: movq %rdx, (%rdi)
; CHECK-O3-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = sdiv i64 %prev, 15
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal, as expected
-define void @rmw_fold_sdiv2(i64* %p, i64 %v) {
+define void @rmw_fold_sdiv2(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_sdiv2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax
; CHECK-O3-NEXT: movq %rax, (%rdi)
; CHECK-O3-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = sdiv i64 %prev, %v
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal, as expected
-define void @rmw_fold_udiv1(i64* %p, i64 %v) {
+define void @rmw_fold_udiv1(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_udiv1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rdx
; CHECK-O3-EX-NEXT: shrq $3, %rax
; CHECK-O3-EX-NEXT: movq %rax, (%rdi)
; CHECK-O3-EX-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = udiv i64 %prev, 15
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal, as expected
-define void @rmw_fold_udiv2(i64* %p, i64 %v) {
+define void @rmw_fold_udiv2(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_udiv2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax
; CHECK-O3-NEXT: movq %rax, (%rdi)
; CHECK-O3-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = udiv i64 %prev, %v
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal, as expected
-define void @rmw_fold_srem1(i64* %p, i64 %v) {
+define void @rmw_fold_srem1(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_srem1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: subq %rax, %rcx
; CHECK-O3-NEXT: movq %rcx, (%rdi)
; CHECK-O3-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = srem i64 %prev, 15
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal, as expected
-define void @rmw_fold_srem2(i64* %p, i64 %v) {
+define void @rmw_fold_srem2(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_srem2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: # kill: def $edx killed $edx def $rdx
; CHECK-O3-NEXT: movq %rdx, (%rdi)
; CHECK-O3-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = srem i64 %prev, %v
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal, as expected
-define void @rmw_fold_urem1(i64* %p, i64 %v) {
+define void @rmw_fold_urem1(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_urem1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: subq %rax, %rdx
; CHECK-O3-NEXT: movq %rdx, (%rdi)
; CHECK-O3-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = urem i64 %prev, 15
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal, as expected
-define void @rmw_fold_urem2(i64* %p, i64 %v) {
+define void @rmw_fold_urem2(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_urem2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: # kill: def $edx killed $edx def $rdx
; CHECK-O3-NEXT: movq %rdx, (%rdi)
; CHECK-O3-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = urem i64 %prev, %v
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal to fold (TODO)
-define void @rmw_fold_shl1(i64* %p, i64 %v) {
+define void @rmw_fold_shl1(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_shl1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: shlq $15, (%rdi)
; CHECK-O3-EX-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = shl i64 %prev, 15
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal to fold (TODO)
-define void @rmw_fold_shl2(i64* %p, i64 %v) {
+define void @rmw_fold_shl2(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_shl2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: # kill: def $cl killed $cl killed $rcx
; CHECK-O3-EX-NEXT: shlq %cl, (%rdi)
; CHECK-O3-EX-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = shl i64 %prev, %v
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal to fold (TODO)
-define void @rmw_fold_lshr1(i64* %p, i64 %v) {
+define void @rmw_fold_lshr1(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_lshr1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: shrq $15, (%rdi)
; CHECK-O3-EX-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = lshr i64 %prev, 15
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal to fold (TODO)
-define void @rmw_fold_lshr2(i64* %p, i64 %v) {
+define void @rmw_fold_lshr2(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_lshr2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: # kill: def $cl killed $cl killed $rcx
; CHECK-O3-EX-NEXT: shrq %cl, (%rdi)
; CHECK-O3-EX-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = lshr i64 %prev, %v
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal to fold (TODO)
-define void @rmw_fold_ashr1(i64* %p, i64 %v) {
+define void @rmw_fold_ashr1(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_ashr1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-EX: # %bb.0:
; CHECK-O3-EX-NEXT: sarq $15, (%rdi)
; CHECK-O3-EX-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = ashr i64 %prev, 15
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal to fold (TODO)
-define void @rmw_fold_ashr2(i64* %p, i64 %v) {
+define void @rmw_fold_ashr2(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_ashr2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: # kill: def $cl killed $cl killed $rcx
; CHECK-O3-EX-NEXT: sarq %cl, (%rdi)
; CHECK-O3-EX-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = ashr i64 %prev, %v
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal, as expected
-define void @rmw_fold_and1(i64* %p, i64 %v) {
+define void @rmw_fold_and1(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_and1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: andq $15, (%rdi)
; CHECK-O3-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = and i64 %prev, 15
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal, as expected
-define void @rmw_fold_and2(i64* %p, i64 %v) {
+define void @rmw_fold_and2(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_and2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: andq %rsi, (%rdi)
; CHECK-O3-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = and i64 %prev, %v
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal, as expected
-define void @rmw_fold_or1(i64* %p, i64 %v) {
+define void @rmw_fold_or1(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_or1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: orq $15, (%rdi)
; CHECK-O3-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = or i64 %prev, 15
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal, as expected
-define void @rmw_fold_or2(i64* %p, i64 %v) {
+define void @rmw_fold_or2(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_or2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: orq %rsi, (%rdi)
; CHECK-O3-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = or i64 %prev, %v
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal, as expected
-define void @rmw_fold_xor1(i64* %p, i64 %v) {
+define void @rmw_fold_xor1(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_xor1:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: xorq $15, (%rdi)
; CHECK-O3-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = xor i64 %prev, 15
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
; Legal, as expected
-define void @rmw_fold_xor2(i64* %p, i64 %v) {
+define void @rmw_fold_xor2(ptr %p, i64 %v) {
; CHECK-O0-LABEL: rmw_fold_xor2:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: xorq %rsi, (%rdi)
; CHECK-O3-NEXT: retq
- %prev = load atomic i64, i64* %p unordered, align 8
+ %prev = load atomic i64, ptr %p unordered, align 8
%val = xor i64 %prev, %v
- store atomic i64 %val, i64* %p unordered, align 8
+ store atomic i64 %val, ptr %p unordered, align 8
ret void
}
;; be folded against the memory operation.
; Legal to reduce the load width (TODO)
-define i32 @fold_trunc(i64* %p) {
+define i32 @fold_trunc(ptr %p) {
; CHECK-LABEL: fold_trunc:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%ret = trunc i64 %v to i32
ret i32 %ret
}
; Legal to reduce the load width and fold the load (TODO)
-define i32 @fold_trunc_add(i64* %p, i32 %v2) {
+define i32 @fold_trunc_add(ptr %p, i32 %v2) {
; CHECK-O0-LABEL: fold_trunc_add:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: addl %esi, %eax
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%trunc = trunc i64 %v to i32
%ret = add i32 %trunc, %v2
ret i32 %ret
}
; Legal to reduce the load width and fold the load (TODO)
-define i32 @fold_trunc_and(i64* %p, i32 %v2) {
+define i32 @fold_trunc_and(ptr %p, i32 %v2) {
; CHECK-O0-LABEL: fold_trunc_and:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: andl %esi, %eax
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%trunc = trunc i64 %v to i32
%ret = and i32 %trunc, %v2
ret i32 %ret
}
; Legal to reduce the load width and fold the load (TODO)
-define i32 @fold_trunc_or(i64* %p, i32 %v2) {
+define i32 @fold_trunc_or(ptr %p, i32 %v2) {
; CHECK-O0-LABEL: fold_trunc_or:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: orl %esi, %eax
; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%trunc = trunc i64 %v to i32
%ret = or i32 %trunc, %v2
ret i32 %ret
; It's tempting to split the wide load into two smaller byte loads
; to reduce memory traffic, but this would be illegal for a atomic load
-define i32 @split_load(i64* %p) {
+define i32 @split_load(ptr %p) {
; CHECK-O0-LABEL: split_load:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rcx
; CHECK-O3-NEXT: orl %eax, %ecx
; CHECK-O3-NEXT: movzbl %cl, %eax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
%b1 = trunc i64 %v to i8
%v.shift = lshr i64 %v, 32
%b2 = trunc i64 %v.shift to i8
@Zero = constant i64 0
; TODO: should return constant
-define i64 @constant_folding(i64* %p) {
+define i64 @constant_folding(ptr %p) {
; CHECK-LABEL: constant_folding:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
ret i64 %v
}
; Legal to forward and fold (TODO)
-define i64 @load_forwarding(i64* %p) {
+define i64 @load_forwarding(ptr %p) {
; CHECK-LABEL: load_forwarding:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: orq (%rdi), %rax
; CHECK-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
- %v2 = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
+ %v2 = load atomic i64, ptr %p unordered, align 8
%ret = or i64 %v, %v2
ret i64 %ret
}
; Legal to forward (TODO)
-define i64 @store_forward(i64* %p, i64 %v) {
+define i64 @store_forward(ptr %p, i64 %v) {
; CHECK-LABEL: store_forward:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, (%rdi)
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: retq
- store atomic i64 %v, i64* %p unordered, align 8
- %ret = load atomic i64, i64* %p unordered, align 8
+ store atomic i64 %v, ptr %p unordered, align 8
+ %ret = load atomic i64, ptr %p unordered, align 8
ret i64 %ret
}
; Legal to kill (TODO)
-define void @dead_writeback(i64* %p) {
+define void @dead_writeback(ptr %p) {
; CHECK-LABEL: dead_writeback:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: movq %rax, (%rdi)
; CHECK-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
- store atomic i64 %v, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
+ store atomic i64 %v, ptr %p unordered, align 8
ret void
}
; Legal to kill (TODO)
-define void @dead_store(i64* %p, i64 %v) {
+define void @dead_store(ptr %p, i64 %v) {
; CHECK-LABEL: dead_store:
; CHECK: # %bb.0:
; CHECK-NEXT: movq $0, (%rdi)
; CHECK-NEXT: movq %rsi, (%rdi)
; CHECK-NEXT: retq
- store atomic i64 0, i64* %p unordered, align 8
- store atomic i64 %v, i64* %p unordered, align 8
+ store atomic i64 0, ptr %p unordered, align 8
+ store atomic i64 %v, ptr %p unordered, align 8
ret void
}
;; If that were to happen, please rewrite the test to ensure load movement
;; isn't violated.
-define i64 @nofold_fence(i64* %p) {
+define i64 @nofold_fence(ptr %p) {
; CHECK-LABEL: nofold_fence:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: mfence
; CHECK-NEXT: addq $15, %rax
; CHECK-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
fence seq_cst
%ret = add i64 %v, 15
ret i64 %ret
}
-define i64 @nofold_fence_acquire(i64* %p) {
+define i64 @nofold_fence_acquire(ptr %p) {
; CHECK-LABEL: nofold_fence_acquire:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: #MEMBARRIER
; CHECK-NEXT: addq $15, %rax
; CHECK-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
fence acquire
%ret = add i64 %v, 15
ret i64 %ret
}
-define i64 @nofold_stfence(i64* %p) {
+define i64 @nofold_stfence(ptr %p) {
; CHECK-LABEL: nofold_stfence:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: #MEMBARRIER
; CHECK-NEXT: addq $15, %rax
; CHECK-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8
+ %v = load atomic i64, ptr %p unordered, align 8
fence syncscope("singlethread") seq_cst
%ret = add i64 %v, 15
ret i64 %ret
; CHECK-O3-NEXT: movq %rdi, %rax
; CHECK-O3-NEXT: addq Constant(%rip), %rax
; CHECK-O3-NEXT: retq
- %v = load atomic i64, i64* @Constant unordered, align 8
+ %v = load atomic i64, ptr @Constant unordered, align 8
%ret = add i64 %v, %arg
ret i64 %ret
}
-define i64 @fold_constant_clobber(i64* %p, i64 %arg) {
+define i64 @fold_constant_clobber(ptr %p, i64 %arg) {
; CHECK-O0-LABEL: fold_constant_clobber:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq Constant(%rip), %rax
; CHECK-O3-EX-NEXT: addq Constant(%rip), %rax
; CHECK-O3-EX-NEXT: movq $5, (%rdi)
; CHECK-O3-EX-NEXT: retq
- %v = load atomic i64, i64* @Constant unordered, align 8
- store i64 5, i64* %p
+ %v = load atomic i64, ptr @Constant unordered, align 8
+ store i64 5, ptr %p
%ret = add i64 %v, %arg
ret i64 %ret
}
; CHECK-O3-EX-NEXT: addq Constant(%rip), %rax
; CHECK-O3-EX-NEXT: mfence
; CHECK-O3-EX-NEXT: retq
- %v = load atomic i64, i64* @Constant unordered, align 8
+ %v = load atomic i64, ptr @Constant unordered, align 8
fence seq_cst
%ret = add i64 %v, %arg
ret i64 %ret
}
-define i64 @fold_invariant_clobber(i64* dereferenceable(8) %p, i64 %arg) {
+define i64 @fold_invariant_clobber(ptr dereferenceable(8) %p, i64 %arg) {
; CHECK-O0-LABEL: fold_invariant_clobber:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: addq (%rdi), %rax
; CHECK-O3-EX-NEXT: movq $5, (%rdi)
; CHECK-O3-EX-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{}
- store i64 5, i64* %p
+ %v = load atomic i64, ptr %p unordered, align 8, !invariant.load !{}
+ store i64 5, ptr %p
%ret = add i64 %v, %arg
ret i64 %ret
}
-define i64 @fold_invariant_fence(i64* dereferenceable(8) %p, i64 %arg) {
+define i64 @fold_invariant_fence(ptr dereferenceable(8) %p, i64 %arg) {
; CHECK-O0-LABEL: fold_invariant_fence:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O3-EX-NEXT: addq (%rdi), %rax
; CHECK-O3-EX-NEXT: mfence
; CHECK-O3-EX-NEXT: retq
- %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{}
+ %v = load atomic i64, ptr %p unordered, align 8, !invariant.load !{}
fence seq_cst
%ret = add i64 %v, %arg
ret i64 %ret
; Exercise a few cases involving any extend idioms
-define i16 @load_i8_anyext_i16(i8* %ptr) {
+define i16 @load_i8_anyext_i16(ptr %ptr) {
; CHECK-O0-CUR-LABEL: load_i8_anyext_i16:
; CHECK-O0-CUR: # %bb.0:
; CHECK-O0-CUR-NEXT: movb (%rdi), %al
; CHECK-O3-EX-NEXT: vmovd %xmm0, %eax
; CHECK-O3-EX-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-O3-EX-NEXT: retq
- %v = load atomic i8, i8* %ptr unordered, align 2
+ %v = load atomic i8, ptr %ptr unordered, align 2
%vec = insertelement <2 x i8> undef, i8 %v, i32 0
%res = bitcast <2 x i8> %vec to i16
ret i16 %res
}
-define i32 @load_i8_anyext_i32(i8* %ptr) {
+define i32 @load_i8_anyext_i32(ptr %ptr) {
; CHECK-O0-CUR-LABEL: load_i8_anyext_i32:
; CHECK-O0-CUR: # %bb.0:
; CHECK-O0-CUR-NEXT: movb (%rdi), %al
; CHECK-O3-EX-NEXT: vpbroadcastb (%rdi), %xmm0
; CHECK-O3-EX-NEXT: vmovd %xmm0, %eax
; CHECK-O3-EX-NEXT: retq
- %v = load atomic i8, i8* %ptr unordered, align 4
+ %v = load atomic i8, ptr %ptr unordered, align 4
%vec = insertelement <4 x i8> undef, i8 %v, i32 0
%res = bitcast <4 x i8> %vec to i32
ret i32 %res
}
-define i32 @load_i16_anyext_i32(i16* %ptr) {
+define i32 @load_i16_anyext_i32(ptr %ptr) {
; CHECK-O0-CUR-LABEL: load_i16_anyext_i32:
; CHECK-O0-CUR: # %bb.0:
; CHECK-O0-CUR-NEXT: movw (%rdi), %cx
; CHECK-O3-EX-NEXT: vpbroadcastw (%rdi), %xmm0
; CHECK-O3-EX-NEXT: vmovd %xmm0, %eax
; CHECK-O3-EX-NEXT: retq
- %v = load atomic i16, i16* %ptr unordered, align 4
+ %v = load atomic i16, ptr %ptr unordered, align 4
%vec = insertelement <2 x i16> undef, i16 %v, i64 0
%res = bitcast <2 x i16> %vec to i32
ret i32 %res
}
-define i64 @load_i16_anyext_i64(i16* %ptr) {
+define i64 @load_i16_anyext_i64(ptr %ptr) {
; CHECK-O0-CUR-LABEL: load_i16_anyext_i64:
; CHECK-O0-CUR: # %bb.0:
; CHECK-O0-CUR-NEXT: movw (%rdi), %cx
; CHECK-O3-EX-NEXT: vpbroadcastw (%rdi), %xmm0
; CHECK-O3-EX-NEXT: vmovq %xmm0, %rax
; CHECK-O3-EX-NEXT: retq
- %v = load atomic i16, i16* %ptr unordered, align 8
+ %v = load atomic i16, ptr %ptr unordered, align 8
%vec = insertelement <4 x i16> undef, i16 %v, i64 0
%res = bitcast <4 x i16> %vec to i64
ret i64 %res
}
; TODO: Would be legal to combine for legal atomic wider types
-define i16 @load_combine(i8* %p) {
+define i16 @load_combine(ptr %p) {
; CHECK-O0-LABEL: load_combine:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movb (%rdi), %al
; CHECK-O3-NEXT: orl %ecx, %eax
; CHECK-O3-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-O3-NEXT: retq
- %v1 = load atomic i8, i8* %p unordered, align 2
- %p2 = getelementptr i8, i8* %p, i64 1
- %v2 = load atomic i8, i8* %p2 unordered, align 1
+ %v1 = load atomic i8, ptr %p unordered, align 2
+ %p2 = getelementptr i8, ptr %p, i64 1
+ %v2 = load atomic i8, ptr %p2 unordered, align 1
%v1.ext = zext i8 %v1 to i16
%v2.ext = zext i8 %v2 to i16
%v2.sht = shl i16 %v2.ext, 8
ret i16 %res
}
-define i1 @fold_cmp_over_fence(i32* %p, i32 %v1) {
+define i1 @fold_cmp_over_fence(ptr %p, i32 %v1) {
; CHECK-O0-LABEL: fold_cmp_over_fence:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movl (%rdi), %eax
; CHECK-O3-EX-NEXT: .LBB116_2: # %untaken
; CHECK-O3-EX-NEXT: xorl %eax, %eax
; CHECK-O3-EX-NEXT: retq
- %v2 = load atomic i32, i32* %p unordered, align 4
+ %v2 = load atomic i32, ptr %p unordered, align 4
fence seq_cst
%cmp = icmp eq i32 %v1, %v2
br i1 %cmp, label %taken, label %untaken
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -opaque-pointers=0 < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
%struct.i = type { i32, i24 }
%struct.m = type { %struct.i }
@x2 = local_unnamed_addr global i32 0, align 4
@x3 = local_unnamed_addr global i32 0, align 4
@x4 = local_unnamed_addr global i32 0, align 4
-@x5 = local_unnamed_addr global double* null, align 8
+@x5 = local_unnamed_addr global ptr null, align 8
; Check that compiler does not crash.
; Test for PR30775
; CHECK-NEXT: movw %si, (%rax)
; CHECK-NEXT: retq
entry:
- %bf.load = load i32, i32* bitcast (i24* getelementptr inbounds (%struct.m, %struct.m* @k, i64 0, i32 0, i32 1) to i32*), align 4
- %0 = load i16, i16* @c, align 2
+ %bf.load = load i32, ptr getelementptr inbounds (%struct.m, ptr @k, i64 0, i32 0, i32 1), align 4
+ %0 = load i16, ptr @c, align 2
%conv = sext i16 %0 to i32
- %1 = load i16, i16* @b, align 2
+ %1 = load i16, ptr @b, align 2
%conv1 = sext i16 %1 to i32
- %2 = load i32, i32* @a, align 4
+ %2 = load i32, ptr @a, align 4
%tobool = icmp ne i32 %2, 0
- %bf.load3 = load i32, i32* getelementptr inbounds (%struct.i, %struct.i* @l, i64 0, i32 0), align 4
+ %bf.load3 = load i32, ptr @l, align 4
%bf.shl = shl i32 %bf.load3, 7
%bf.ashr = ashr exact i32 %bf.shl, 7
%bf.clear = shl i32 %bf.load, 1
%phitmp = icmp eq i32 %bf.ashr, 0
%.phitmp = or i1 %phitmp, %tobool29
%conv37 = zext i1 %.phitmp to i16
- store i16 %conv37, i16* @e, align 2
+ store i16 %conv37, ptr @e, align 2
%bf.clear39 = and i32 %bf.load, 65535
%factor53 = shl nuw nsw i32 %bf.clear39, 1
%add46 = add nsw i32 %factor53, %conv
%add51 = add nuw nsw i32 %add48.lobit.not, %bf.clear39
%shr = ashr i32 %2, %add51
%conv52 = trunc i32 %shr to i16
- store i16 %conv52, i16* @b, align 2
+ store i16 %conv52, ptr @b, align 2
ret void
}
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq x1@GOTPCREL(%rip), %rax
-; CHECK-NEXT: movl (%rax), %ebx
-; CHECK-NEXT: andl $511, %ebx # imm = 0x1FF
-; CHECK-NEXT: leaq 1(%rbx), %rax
+; CHECK-NEXT: movl (%rax), %esi
+; CHECK-NEXT: andl $511, %esi # imm = 0x1FF
+; CHECK-NEXT: leaq 1(%rsi), %rax
; CHECK-NEXT: movq x4@GOTPCREL(%rip), %rcx
; CHECK-NEXT: movl %eax, (%rcx)
; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rcx
-; CHECK-NEXT: movl (%rcx), %ecx
-; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: movl (%rcx), %edx
+; CHECK-NEXT: testl %edx, %edx
; CHECK-NEXT: je .LBB1_18
; CHECK-NEXT: # %bb.1: # %for.cond1thread-pre-split.lr.ph
-; CHECK-NEXT: movq x5@GOTPCREL(%rip), %rdx
-; CHECK-NEXT: movq (%rdx), %rsi
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: notl %edx
-; CHECK-NEXT: leaq 8(,%rdx,8), %rdi
-; CHECK-NEXT: imulq %rax, %rdi
-; CHECK-NEXT: addq %rsi, %rdi
-; CHECK-NEXT: movq x2@GOTPCREL(%rip), %r8
-; CHECK-NEXT: movl (%r8), %edx
-; CHECK-NEXT: leal 8(,%rbx,8), %eax
-; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: leaq 8(%rsi), %rax
-; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: leaq 32(%rsi), %r11
-; CHECK-NEXT: leaq 8(,%rbx,8), %rbx
-; CHECK-NEXT: xorl %r14d, %r14d
-; CHECK-NEXT: movq x0@GOTPCREL(%rip), %r15
-; CHECK-NEXT: movq %rsi, %r12
+; CHECK-NEXT: movq x5@GOTPCREL(%rip), %rcx
+; CHECK-NEXT: movq (%rcx), %rdi
+; CHECK-NEXT: movl %edx, %ecx
+; CHECK-NEXT: notl %ecx
+; CHECK-NEXT: leaq 8(,%rcx,8), %rcx
+; CHECK-NEXT: imulq %rax, %rcx
+; CHECK-NEXT: addq %rdi, %rcx
+; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq x2@GOTPCREL(%rip), %r9
+; CHECK-NEXT: movl (%r9), %ecx
+; CHECK-NEXT: leal 8(,%rsi,8), %r8d
+; CHECK-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: leaq 8(%rdi), %r8
+; CHECK-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: leaq 32(%rdi), %rbx
+; CHECK-NEXT: leaq 8(,%rsi,8), %r14
+; CHECK-NEXT: xorl %r15d, %r15d
+; CHECK-NEXT: movq x0@GOTPCREL(%rip), %r12
+; CHECK-NEXT: movq %rdi, %r13
; CHECK-NEXT: jmp .LBB1_2
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB1_15: # %for.cond1.for.inc3_crit_edge
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: movl %edx, (%r8)
+; CHECK-NEXT: movl %ecx, (%r9)
; CHECK-NEXT: .LBB1_16: # %for.inc3
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: addq %rbx, %r12
-; CHECK-NEXT: incq %r14
-; CHECK-NEXT: addq %rbx, %r11
-; CHECK-NEXT: incl %ecx
+; CHECK-NEXT: incq %r15
+; CHECK-NEXT: addq %r14, %rbx
+; CHECK-NEXT: incl %edx
+; CHECK-NEXT: leaq (%r13,%rax,8), %r13
; CHECK-NEXT: je .LBB1_17
; CHECK-NEXT: .LBB1_2: # %for.cond1thread-pre-split
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB1_12 Depth 2
; CHECK-NEXT: # Child Loop BB1_14 Depth 2
-; CHECK-NEXT: testl %edx, %edx
+; CHECK-NEXT: testl %ecx, %ecx
; CHECK-NEXT: jns .LBB1_16
; CHECK-NEXT: # %bb.3: # %for.body2.preheader
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: movslq %edx, %r13
-; CHECK-NEXT: testq %r13, %r13
-; CHECK-NEXT: movq $-1, %rbp
-; CHECK-NEXT: cmovnsq %r13, %rbp
-; CHECK-NEXT: subq %r13, %rbp
-; CHECK-NEXT: incq %rbp
-; CHECK-NEXT: cmpq $4, %rbp
+; CHECK-NEXT: movslq %ecx, %rbp
+; CHECK-NEXT: testq %rbp, %rbp
+; CHECK-NEXT: movq $-1, %rsi
+; CHECK-NEXT: cmovnsq %rbp, %rsi
+; CHECK-NEXT: subq %rbp, %rsi
+; CHECK-NEXT: incq %rsi
+; CHECK-NEXT: cmpq $4, %rsi
; CHECK-NEXT: jb .LBB1_14
; CHECK-NEXT: # %bb.4: # %min.iters.checked
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: movq %rbp, %rdx
-; CHECK-NEXT: andq $-4, %rdx
+; CHECK-NEXT: movq %rsi, %rcx
+; CHECK-NEXT: andq $-4, %rcx
; CHECK-NEXT: je .LBB1_14
; CHECK-NEXT: # %bb.5: # %vector.memcheck
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; CHECK-NEXT: imulq %r14, %rax
-; CHECK-NEXT: leaq (%rsi,%rax), %r10
-; CHECK-NEXT: leaq (%r10,%r13,8), %r9
-; CHECK-NEXT: testq %r13, %r13
-; CHECK-NEXT: movq $-1, %r10
-; CHECK-NEXT: cmovnsq %r13, %r10
-; CHECK-NEXT: cmpq %r15, %r9
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; CHECK-NEXT: imulq %r15, %r10
+; CHECK-NEXT: leaq (%rdi,%r10), %r11
+; CHECK-NEXT: leaq (%r11,%rbp,8), %r8
+; CHECK-NEXT: testq %rbp, %rbp
+; CHECK-NEXT: movq $-1, %r11
+; CHECK-NEXT: cmovnsq %rbp, %r11
+; CHECK-NEXT: cmpq %r12, %r8
; CHECK-NEXT: jae .LBB1_7
; CHECK-NEXT: # %bb.6: # %vector.memcheck
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
-; CHECK-NEXT: leaq (%rax,%r10,8), %rax
-; CHECK-NEXT: cmpq %r15, %rax
+; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
+; CHECK-NEXT: leaq (%r10,%r11,8), %r8
+; CHECK-NEXT: cmpq %r12, %r8
; CHECK-NEXT: ja .LBB1_14
; CHECK-NEXT: .LBB1_7: # %vector.body.preheader
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: leaq -4(%rdx), %r9
-; CHECK-NEXT: movq %r9, %rax
-; CHECK-NEXT: shrq $2, %rax
-; CHECK-NEXT: btl $2, %r9d
+; CHECK-NEXT: leaq -4(%rcx), %r8
+; CHECK-NEXT: movq %r8, %r11
+; CHECK-NEXT: shrq $2, %r11
+; CHECK-NEXT: btl $2, %r8d
; CHECK-NEXT: jb .LBB1_8
; CHECK-NEXT: # %bb.9: # %vector.body.prol.preheader
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; CHECK-NEXT: movdqu %xmm0, (%r12,%r13,8)
-; CHECK-NEXT: movdqu %xmm0, 16(%r12,%r13,8)
+; CHECK-NEXT: movdqu %xmm0, (%r13,%rbp,8)
+; CHECK-NEXT: movdqu %xmm0, 16(%r13,%rbp,8)
; CHECK-NEXT: movl $4, %r10d
-; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: testq %r11, %r11
; CHECK-NEXT: jne .LBB1_11
; CHECK-NEXT: jmp .LBB1_13
; CHECK-NEXT: .LBB1_8: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: xorl %r10d, %r10d
-; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: testq %r11, %r11
; CHECK-NEXT: je .LBB1_13
; CHECK-NEXT: .LBB1_11: # %vector.body.preheader.new
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; CHECK-NEXT: movq %r10, %rax
-; CHECK-NEXT: subq %rdx, %rax
-; CHECK-NEXT: addq %r13, %r10
-; CHECK-NEXT: leaq (%r11,%r10,8), %r10
+; CHECK-NEXT: movq %r10, %r11
+; CHECK-NEXT: subq %rcx, %r11
+; CHECK-NEXT: addq %rbp, %r10
+; CHECK-NEXT: leaq (%rbx,%r10,8), %r10
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB1_12: # %vector.body
; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
; CHECK-NEXT: movdqu %xmm0, (%r10)
; CHECK-NEXT: movdqu %xmm0, 16(%r10)
; CHECK-NEXT: addq $64, %r10
-; CHECK-NEXT: addq $8, %rax
+; CHECK-NEXT: addq $8, %r11
; CHECK-NEXT: jne .LBB1_12
; CHECK-NEXT: .LBB1_13: # %middle.block
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: addq %rdx, %r13
-; CHECK-NEXT: cmpq %rdx, %rbp
-; CHECK-NEXT: movq %r13, %rdx
+; CHECK-NEXT: addq %rcx, %rbp
+; CHECK-NEXT: cmpq %rcx, %rsi
+; CHECK-NEXT: movq %rbp, %rcx
; CHECK-NEXT: je .LBB1_15
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB1_14: # %for.body2
; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: movq (%r15), %rax
-; CHECK-NEXT: movq %rax, (%r12,%r13,8)
-; CHECK-NEXT: leaq 1(%r13), %rdx
-; CHECK-NEXT: cmpq $-1, %r13
-; CHECK-NEXT: movq %rdx, %r13
+; CHECK-NEXT: movq (%r12), %rcx
+; CHECK-NEXT: movq %rcx, (%r13,%rbp,8)
+; CHECK-NEXT: leaq 1(%rbp), %rcx
+; CHECK-NEXT: cmpq $-1, %rbp
+; CHECK-NEXT: movq %rcx, %rbp
; CHECK-NEXT: jl .LBB1_14
; CHECK-NEXT: jmp .LBB1_15
; CHECK-NEXT: .LBB1_17: # %for.cond.for.end5_crit_edge
; CHECK-NEXT: movq x5@GOTPCREL(%rip), %rax
-; CHECK-NEXT: movq %rdi, (%rax)
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; CHECK-NEXT: movq %rcx, (%rax)
; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rax
; CHECK-NEXT: movl $0, (%rax)
; CHECK-NEXT: .LBB1_18: # %for.end5
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
- %0 = load i32, i32* @x1, align 4
+ %0 = load i32, ptr @x1, align 4
%and = and i32 %0, 511
%add = add nuw nsw i32 %and, 1
- store i32 %add, i32* @x4, align 4
- %.pr = load i32, i32* @x3, align 4
+ store i32 %add, ptr @x4, align 4
+ %.pr = load i32, ptr @x3, align 4
%tobool8 = icmp eq i32 %.pr, 0
br i1 %tobool8, label %for.end5, label %for.cond1thread-pre-split.lr.ph
for.cond1thread-pre-split.lr.ph: ; preds = %entry
%idx.ext13 = zext i32 %add to i64
- %x5.promoted = load double*, double** @x5, align 8
- %x5.promoted9 = bitcast double* %x5.promoted to i8*
+ %x5.promoted = load ptr, ptr @x5, align 8
%1 = xor i32 %.pr, -1
%2 = zext i32 %1 to i64
%3 = shl nuw nsw i64 %2, 3
%4 = add nuw nsw i64 %3, 8
%5 = mul nuw nsw i64 %4, %idx.ext13
- %uglygep = getelementptr i8, i8* %x5.promoted9, i64 %5
- %.pr6.pre = load i32, i32* @x2, align 4
+ %uglygep = getelementptr i8, ptr %x5.promoted, i64 %5
+ %.pr6.pre = load i32, ptr @x2, align 4
%6 = shl nuw nsw i32 %and, 3
%addconv = add nuw nsw i32 %6, 8
%7 = zext i32 %addconv to i64
- %scevgep15 = getelementptr double, double* %x5.promoted, i64 1
- %scevgep1516 = bitcast double* %scevgep15 to i8*
+ %scevgep15 = getelementptr double, ptr %x5.promoted, i64 1
br label %for.cond1thread-pre-split
for.cond1thread-pre-split: ; preds = %for.cond1thread-pre-split.lr.ph, %for.inc3
%indvar = phi i64 [ 0, %for.cond1thread-pre-split.lr.ph ], [ %indvar.next, %for.inc3 ]
%.pr6 = phi i32 [ %.pr6.pre, %for.cond1thread-pre-split.lr.ph ], [ %.pr611, %for.inc3 ]
- %8 = phi double* [ %x5.promoted, %for.cond1thread-pre-split.lr.ph ], [ %add.ptr, %for.inc3 ]
+ %8 = phi ptr [ %x5.promoted, %for.cond1thread-pre-split.lr.ph ], [ %add.ptr, %for.inc3 ]
%9 = phi i32 [ %.pr, %for.cond1thread-pre-split.lr.ph ], [ %inc4, %for.inc3 ]
%10 = mul i64 %7, %indvar
- %uglygep14 = getelementptr i8, i8* %x5.promoted9, i64 %10
- %uglygep17 = getelementptr i8, i8* %scevgep1516, i64 %10
+ %uglygep14 = getelementptr i8, ptr %x5.promoted, i64 %10
+ %uglygep17 = getelementptr i8, ptr %scevgep15, i64 %10
%cmp7 = icmp slt i32 %.pr6, 0
br i1 %cmp7, label %for.body2.preheader, label %for.inc3
vector.memcheck: ; preds = %min.iters.checked
%16 = shl nsw i64 %11, 3
- %scevgep = getelementptr i8, i8* %uglygep14, i64 %16
+ %scevgep = getelementptr i8, ptr %uglygep14, i64 %16
%17 = icmp sgt i64 %11, -1
%smax18 = select i1 %17, i64 %11, i64 -1
%18 = shl nsw i64 %smax18, 3
- %scevgep19 = getelementptr i8, i8* %uglygep17, i64 %18
- %bound0 = icmp ult i8* %scevgep, bitcast (double* @x0 to i8*)
- %bound1 = icmp ugt i8* %scevgep19, bitcast (double* @x0 to i8*)
+ %scevgep19 = getelementptr i8, ptr %uglygep17, i64 %18
+ %bound0 = icmp ult ptr %scevgep, @x0
+ %bound1 = icmp ugt ptr %scevgep19, @x0
%memcheck.conflict = and i1 %bound0, %bound1
%ind.end = add nsw i64 %11, %n.vec
br i1 %memcheck.conflict, label %for.body2.preheader21, label %vector.body.preheader
br label %vector.body.prol
vector.body.prol: ; preds = %vector.body.prol.preheader
- %22 = load i64, i64* bitcast (double* @x0 to i64*), align 8
+ %22 = load i64, ptr @x0, align 8
%23 = insertelement <2 x i64> undef, i64 %22, i32 0
%24 = shufflevector <2 x i64> %23, <2 x i64> undef, <2 x i32> zeroinitializer
%25 = insertelement <2 x i64> undef, i64 %22, i32 0
%26 = shufflevector <2 x i64> %25, <2 x i64> undef, <2 x i32> zeroinitializer
- %27 = getelementptr inbounds double, double* %8, i64 %11
- %28 = bitcast double* %27 to <2 x i64>*
- store <2 x i64> %24, <2 x i64>* %28, align 8
- %29 = getelementptr double, double* %27, i64 2
- %30 = bitcast double* %29 to <2 x i64>*
- store <2 x i64> %26, <2 x i64>* %30, align 8
+ %27 = getelementptr inbounds double, ptr %8, i64 %11
+ store <2 x i64> %24, ptr %27, align 8
+ %28 = getelementptr double, ptr %27, i64 2
+ store <2 x i64> %26, ptr %28, align 8
br label %vector.body.prol.loopexit.unr-lcssa
vector.body.prol.loopexit.unr-lcssa: ; preds = %vector.body.preheader, %vector.body.prol
br label %vector.body.prol.loopexit
vector.body.prol.loopexit: ; preds = %vector.body.prol.loopexit.unr-lcssa
- %31 = icmp eq i64 %20, 0
- br i1 %31, label %middle.block, label %vector.body.preheader.new
+ %29 = icmp eq i64 %20, 0
+ br i1 %29, label %middle.block, label %vector.body.preheader.new
vector.body.preheader.new: ; preds = %vector.body.prol.loopexit
- %32 = load i64, i64* bitcast (double* @x0 to i64*), align 8
- %33 = insertelement <2 x i64> undef, i64 %32, i32 0
+ %30 = load i64, ptr @x0, align 8
+ %31 = insertelement <2 x i64> undef, i64 %30, i32 0
+ %32 = shufflevector <2 x i64> %31, <2 x i64> undef, <2 x i32> zeroinitializer
+ %33 = insertelement <2 x i64> undef, i64 %30, i32 0
%34 = shufflevector <2 x i64> %33, <2 x i64> undef, <2 x i32> zeroinitializer
- %35 = insertelement <2 x i64> undef, i64 %32, i32 0
- %36 = shufflevector <2 x i64> %35, <2 x i64> undef, <2 x i32> zeroinitializer
- %37 = load i64, i64* bitcast (double* @x0 to i64*), align 8
- %38 = insertelement <2 x i64> undef, i64 %37, i32 0
+ %35 = load i64, ptr @x0, align 8
+ %36 = insertelement <2 x i64> undef, i64 %35, i32 0
+ %37 = shufflevector <2 x i64> %36, <2 x i64> undef, <2 x i32> zeroinitializer
+ %38 = insertelement <2 x i64> undef, i64 %35, i32 0
%39 = shufflevector <2 x i64> %38, <2 x i64> undef, <2 x i32> zeroinitializer
- %40 = insertelement <2 x i64> undef, i64 %37, i32 0
- %41 = shufflevector <2 x i64> %40, <2 x i64> undef, <2 x i32> zeroinitializer
br label %vector.body
vector.body: ; preds = %vector.body, %vector.body.preheader.new
%index = phi i64 [ %index.unr.ph, %vector.body.preheader.new ], [ %index.next.1, %vector.body ]
- %42 = add i64 %11, %index
- %43 = getelementptr inbounds double, double* %8, i64 %42
- %44 = bitcast double* %43 to <2 x i64>*
- store <2 x i64> %34, <2 x i64>* %44, align 8
- %45 = getelementptr double, double* %43, i64 2
- %46 = bitcast double* %45 to <2 x i64>*
- store <2 x i64> %36, <2 x i64>* %46, align 8
+ %40 = add i64 %11, %index
+ %41 = getelementptr inbounds double, ptr %8, i64 %40
+ store <2 x i64> %32, ptr %41, align 8
+ %42 = getelementptr double, ptr %41, i64 2
+ store <2 x i64> %34, ptr %42, align 8
%index.next = add i64 %index, 4
- %47 = add i64 %11, %index.next
- %48 = getelementptr inbounds double, double* %8, i64 %47
- %49 = bitcast double* %48 to <2 x i64>*
- store <2 x i64> %39, <2 x i64>* %49, align 8
- %50 = getelementptr double, double* %48, i64 2
- %51 = bitcast double* %50 to <2 x i64>*
- store <2 x i64> %41, <2 x i64>* %51, align 8
+ %43 = add i64 %11, %index.next
+ %44 = getelementptr inbounds double, ptr %8, i64 %43
+ store <2 x i64> %37, ptr %44, align 8
+ %45 = getelementptr double, ptr %44, i64 2
+ store <2 x i64> %39, ptr %45, align 8
%index.next.1 = add i64 %index, 8
- %52 = icmp eq i64 %index.next.1, %n.vec
- br i1 %52, label %middle.block.unr-lcssa, label %vector.body
+ %46 = icmp eq i64 %index.next.1, %n.vec
+ br i1 %46, label %middle.block.unr-lcssa, label %vector.body
middle.block.unr-lcssa: ; preds = %vector.body
br label %middle.block
for.body2: ; preds = %for.body2.preheader21, %for.body2
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body2 ], [ %indvars.iv.ph, %for.body2.preheader21 ]
- %53 = load i64, i64* bitcast (double* @x0 to i64*), align 8
- %arrayidx = getelementptr inbounds double, double* %8, i64 %indvars.iv
- %54 = bitcast double* %arrayidx to i64*
- store i64 %53, i64* %54, align 8
+ %47 = load i64, ptr @x0, align 8
+ %arrayidx = getelementptr inbounds double, ptr %8, i64 %indvars.iv
+ store i64 %47, ptr %arrayidx, align 8
%indvars.iv.next = add nsw i64 %indvars.iv, 1
%cmp = icmp slt i64 %indvars.iv, -1
br i1 %cmp, label %for.body2, label %for.cond1.for.inc3_crit_edge.loopexit
for.cond1.for.inc3_crit_edge: ; preds = %for.cond1.for.inc3_crit_edge.loopexit, %middle.block
%indvars.iv.next.lcssa = phi i64 [ %ind.end, %middle.block ], [ %indvars.iv.next, %for.cond1.for.inc3_crit_edge.loopexit ]
- %55 = trunc i64 %indvars.iv.next.lcssa to i32
- store i32 %55, i32* @x2, align 4
+ %48 = trunc i64 %indvars.iv.next.lcssa to i32
+ store i32 %48, ptr @x2, align 4
br label %for.inc3
for.inc3: ; preds = %for.cond1.for.inc3_crit_edge, %for.cond1thread-pre-split
- %.pr611 = phi i32 [ %55, %for.cond1.for.inc3_crit_edge ], [ %.pr6, %for.cond1thread-pre-split ]
+ %.pr611 = phi i32 [ %48, %for.cond1.for.inc3_crit_edge ], [ %.pr6, %for.cond1thread-pre-split ]
%inc4 = add nsw i32 %9, 1
- %add.ptr = getelementptr inbounds double, double* %8, i64 %idx.ext13
+ %add.ptr = getelementptr inbounds double, ptr %8, i64 %idx.ext13
%tobool = icmp eq i32 %inc4, 0
%indvar.next = add i64 %indvar, 1
br i1 %tobool, label %for.cond.for.end5_crit_edge, label %for.cond1thread-pre-split
for.cond.for.end5_crit_edge: ; preds = %for.inc3
- store i8* %uglygep, i8** bitcast (double** @x5 to i8**), align 8
- store i32 0, i32* @x3, align 4
+ store ptr %uglygep, ptr @x5, align 8
+ store i32 0, ptr @x3, align 4
br label %for.end5
for.end5: ; preds = %for.cond.for.end5_crit_edge, %entry