; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone | FileCheck %s -check-prefixes=CHECK,NOOUTLINE
-; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone -mattr=+outline-atomics | FileCheck %s -check-prefixes=CHECK,OUTLINE
-; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone -mattr=+lse | FileCheck %s -check-prefixes=CHECK,LSE
+; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone | FileCheck %s -check-prefixes=NOOUTLINE
+; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone -mattr=+outline-atomics | FileCheck %s -check-prefixes=OUTLINE
+; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=cyclone -mattr=+lse | FileCheck %s -check-prefixes=LSE
@var = global i128 0
}
define void @fetch_and_nand(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_nand:
-; CHECK: // %bb.0:
-; CHECK-NEXT: .LBB4_1: // %atomicrmw.start
-; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldxp x9, x8, [x0]
-; CHECK-NEXT: and x10, x9, x2
-; CHECK-NEXT: and x11, x8, x3
-; CHECK-NEXT: mvn x11, x11
-; CHECK-NEXT: mvn x10, x10
-; CHECK-NEXT: stlxp w12, x10, x11, [x0]
-; CHECK-NEXT: cbnz w12, .LBB4_1
-; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: adrp x10, :got:var
-; CHECK-NEXT: ldr x10, [x10, :got_lo12:var]
-; CHECK-NEXT: stp x9, x8, [x10]
-; CHECK-NEXT: ret
+; NOOUTLINE-LABEL: fetch_and_nand:
+; NOOUTLINE: // %bb.0:
+; NOOUTLINE-NEXT: .LBB4_1: // %atomicrmw.start
+; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; NOOUTLINE-NEXT: ldxp x9, x8, [x0]
+; NOOUTLINE-NEXT: and x10, x9, x2
+; NOOUTLINE-NEXT: and x11, x8, x3
+; NOOUTLINE-NEXT: mvn x11, x11
+; NOOUTLINE-NEXT: mvn x10, x10
+; NOOUTLINE-NEXT: stlxp w12, x10, x11, [x0]
+; NOOUTLINE-NEXT: cbnz w12, .LBB4_1
+; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; NOOUTLINE-NEXT: adrp x10, :got:var
+; NOOUTLINE-NEXT: ldr x10, [x10, :got_lo12:var]
+; NOOUTLINE-NEXT: stp x9, x8, [x10]
+; NOOUTLINE-NEXT: ret
+;
+; OUTLINE-LABEL: fetch_and_nand:
+; OUTLINE: // %bb.0:
+; OUTLINE-NEXT: .LBB4_1: // %atomicrmw.start
+; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; OUTLINE-NEXT: ldxp x9, x8, [x0]
+; OUTLINE-NEXT: and x10, x9, x2
+; OUTLINE-NEXT: and x11, x8, x3
+; OUTLINE-NEXT: mvn x11, x11
+; OUTLINE-NEXT: mvn x10, x10
+; OUTLINE-NEXT: stlxp w12, x10, x11, [x0]
+; OUTLINE-NEXT: cbnz w12, .LBB4_1
+; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; OUTLINE-NEXT: adrp x10, :got:var
+; OUTLINE-NEXT: ldr x10, [x10, :got_lo12:var]
+; OUTLINE-NEXT: stp x9, x8, [x10]
+; OUTLINE-NEXT: ret
+;
+; LSE-LABEL: fetch_and_nand:
+; LSE: // %bb.0:
+; LSE-NEXT: ldp x4, x5, [x0]
+; LSE-NEXT: .LBB4_1: // %atomicrmw.start
+; LSE-NEXT: // =>This Inner Loop Header: Depth=1
+; LSE-NEXT: mov x7, x5
+; LSE-NEXT: mov x6, x4
+; LSE-NEXT: and x8, x7, x3
+; LSE-NEXT: and x9, x4, x2
+; LSE-NEXT: mvn x10, x9
+; LSE-NEXT: mvn x11, x8
+; LSE-NEXT: mov x4, x6
+; LSE-NEXT: mov x5, x7
+; LSE-NEXT: caspl x4, x5, x10, x11, [x0]
+; LSE-NEXT: cmp x5, x7
+; LSE-NEXT: ccmp x4, x6, #0, eq
+; LSE-NEXT: b.ne .LBB4_1
+; LSE-NEXT: // %bb.2: // %atomicrmw.end
+; LSE-NEXT: adrp x8, :got:var
+; LSE-NEXT: ldr x8, [x8, :got_lo12:var]
+; LSE-NEXT: stp x4, x5, [x8]
+; LSE-NEXT: ret
%val = atomicrmw nand i128* %p, i128 %bits release
store i128 %val, i128* @var, align 16
}
define void @fetch_and_or(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_or:
-; CHECK: // %bb.0:
-; CHECK-NEXT: .LBB5_1: // %atomicrmw.start
-; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldaxp x9, x8, [x0]
-; CHECK-NEXT: orr x10, x8, x3
-; CHECK-NEXT: orr x11, x9, x2
-; CHECK-NEXT: stlxp w12, x11, x10, [x0]
-; CHECK-NEXT: cbnz w12, .LBB5_1
-; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: adrp x10, :got:var
-; CHECK-NEXT: ldr x10, [x10, :got_lo12:var]
-; CHECK-NEXT: stp x9, x8, [x10]
-; CHECK-NEXT: ret
+; NOOUTLINE-LABEL: fetch_and_or:
+; NOOUTLINE: // %bb.0:
+; NOOUTLINE-NEXT: .LBB5_1: // %atomicrmw.start
+; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; NOOUTLINE-NEXT: ldaxp x9, x8, [x0]
+; NOOUTLINE-NEXT: orr x10, x8, x3
+; NOOUTLINE-NEXT: orr x11, x9, x2
+; NOOUTLINE-NEXT: stlxp w12, x11, x10, [x0]
+; NOOUTLINE-NEXT: cbnz w12, .LBB5_1
+; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; NOOUTLINE-NEXT: adrp x10, :got:var
+; NOOUTLINE-NEXT: ldr x10, [x10, :got_lo12:var]
+; NOOUTLINE-NEXT: stp x9, x8, [x10]
+; NOOUTLINE-NEXT: ret
+;
+; OUTLINE-LABEL: fetch_and_or:
+; OUTLINE: // %bb.0:
+; OUTLINE-NEXT: .LBB5_1: // %atomicrmw.start
+; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; OUTLINE-NEXT: ldaxp x9, x8, [x0]
+; OUTLINE-NEXT: orr x10, x8, x3
+; OUTLINE-NEXT: orr x11, x9, x2
+; OUTLINE-NEXT: stlxp w12, x11, x10, [x0]
+; OUTLINE-NEXT: cbnz w12, .LBB5_1
+; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; OUTLINE-NEXT: adrp x10, :got:var
+; OUTLINE-NEXT: ldr x10, [x10, :got_lo12:var]
+; OUTLINE-NEXT: stp x9, x8, [x10]
+; OUTLINE-NEXT: ret
+;
+; LSE-LABEL: fetch_and_or:
+; LSE: // %bb.0:
+; LSE-NEXT: ldp x4, x5, [x0]
+; LSE-NEXT: .LBB5_1: // %atomicrmw.start
+; LSE-NEXT: // =>This Inner Loop Header: Depth=1
+; LSE-NEXT: mov x7, x5
+; LSE-NEXT: mov x6, x4
+; LSE-NEXT: orr x8, x4, x2
+; LSE-NEXT: orr x9, x7, x3
+; LSE-NEXT: mov x4, x6
+; LSE-NEXT: mov x5, x7
+; LSE-NEXT: caspal x4, x5, x8, x9, [x0]
+; LSE-NEXT: cmp x5, x7
+; LSE-NEXT: ccmp x4, x6, #0, eq
+; LSE-NEXT: b.ne .LBB5_1
+; LSE-NEXT: // %bb.2: // %atomicrmw.end
+; LSE-NEXT: adrp x8, :got:var
+; LSE-NEXT: ldr x8, [x8, :got_lo12:var]
+; LSE-NEXT: stp x4, x5, [x8]
+; LSE-NEXT: ret
%val = atomicrmw or i128* %p, i128 %bits seq_cst
store i128 %val, i128* @var, align 16
}
define void @fetch_and_add(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_add:
-; CHECK: // %bb.0:
-; CHECK-NEXT: .LBB6_1: // %atomicrmw.start
-; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldaxp x9, x8, [x0]
-; CHECK-NEXT: adds x10, x9, x2
-; CHECK-NEXT: adc x11, x8, x3
-; CHECK-NEXT: stlxp w12, x10, x11, [x0]
-; CHECK-NEXT: cbnz w12, .LBB6_1
-; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: adrp x10, :got:var
-; CHECK-NEXT: ldr x10, [x10, :got_lo12:var]
-; CHECK-NEXT: stp x9, x8, [x10]
-; CHECK-NEXT: ret
-
+; NOOUTLINE-LABEL: fetch_and_add:
+; NOOUTLINE: // %bb.0:
+; NOOUTLINE-NEXT: .LBB6_1: // %atomicrmw.start
+; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; NOOUTLINE-NEXT: ldaxp x9, x8, [x0]
+; NOOUTLINE-NEXT: adds x10, x9, x2
+; NOOUTLINE-NEXT: adc x11, x8, x3
+; NOOUTLINE-NEXT: stlxp w12, x10, x11, [x0]
+; NOOUTLINE-NEXT: cbnz w12, .LBB6_1
+; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; NOOUTLINE-NEXT: adrp x10, :got:var
+; NOOUTLINE-NEXT: ldr x10, [x10, :got_lo12:var]
+; NOOUTLINE-NEXT: stp x9, x8, [x10]
+; NOOUTLINE-NEXT: ret
+;
+; OUTLINE-LABEL: fetch_and_add:
+; OUTLINE: // %bb.0:
+; OUTLINE-NEXT: .LBB6_1: // %atomicrmw.start
+; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; OUTLINE-NEXT: ldaxp x9, x8, [x0]
+; OUTLINE-NEXT: adds x10, x9, x2
+; OUTLINE-NEXT: adc x11, x8, x3
+; OUTLINE-NEXT: stlxp w12, x10, x11, [x0]
+; OUTLINE-NEXT: cbnz w12, .LBB6_1
+; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; OUTLINE-NEXT: adrp x10, :got:var
+; OUTLINE-NEXT: ldr x10, [x10, :got_lo12:var]
+; OUTLINE-NEXT: stp x9, x8, [x10]
+; OUTLINE-NEXT: ret
+;
+; LSE-LABEL: fetch_and_add:
+; LSE: // %bb.0:
+; LSE-NEXT: ldp x4, x5, [x0]
+; LSE-NEXT: .LBB6_1: // %atomicrmw.start
+; LSE-NEXT: // =>This Inner Loop Header: Depth=1
+; LSE-NEXT: mov x7, x5
+; LSE-NEXT: mov x6, x4
+; LSE-NEXT: adds x8, x4, x2
+; LSE-NEXT: adc x9, x7, x3
+; LSE-NEXT: mov x4, x6
+; LSE-NEXT: mov x5, x7
+; LSE-NEXT: caspal x4, x5, x8, x9, [x0]
+; LSE-NEXT: cmp x5, x7
+; LSE-NEXT: ccmp x4, x6, #0, eq
+; LSE-NEXT: b.ne .LBB6_1
+; LSE-NEXT: // %bb.2: // %atomicrmw.end
+; LSE-NEXT: adrp x8, :got:var
+; LSE-NEXT: ldr x8, [x8, :got_lo12:var]
+; LSE-NEXT: stp x4, x5, [x8]
+; LSE-NEXT: ret
%val = atomicrmw add i128* %p, i128 %bits seq_cst
store i128 %val, i128* @var, align 16
ret void
}
define void @fetch_and_sub(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_sub:
-; CHECK: // %bb.0:
-; CHECK-NEXT: .LBB7_1: // %atomicrmw.start
-; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldaxp x9, x8, [x0]
-; CHECK-NEXT: subs x10, x9, x2
-; CHECK-NEXT: sbc x11, x8, x3
-; CHECK-NEXT: stlxp w12, x10, x11, [x0]
-; CHECK-NEXT: cbnz w12, .LBB7_1
-; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: adrp x10, :got:var
-; CHECK-NEXT: ldr x10, [x10, :got_lo12:var]
-; CHECK-NEXT: stp x9, x8, [x10]
-; CHECK-NEXT: ret
-
+; NOOUTLINE-LABEL: fetch_and_sub:
+; NOOUTLINE: // %bb.0:
+; NOOUTLINE-NEXT: .LBB7_1: // %atomicrmw.start
+; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; NOOUTLINE-NEXT: ldaxp x9, x8, [x0]
+; NOOUTLINE-NEXT: subs x10, x9, x2
+; NOOUTLINE-NEXT: sbc x11, x8, x3
+; NOOUTLINE-NEXT: stlxp w12, x10, x11, [x0]
+; NOOUTLINE-NEXT: cbnz w12, .LBB7_1
+; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; NOOUTLINE-NEXT: adrp x10, :got:var
+; NOOUTLINE-NEXT: ldr x10, [x10, :got_lo12:var]
+; NOOUTLINE-NEXT: stp x9, x8, [x10]
+; NOOUTLINE-NEXT: ret
+;
+; OUTLINE-LABEL: fetch_and_sub:
+; OUTLINE: // %bb.0:
+; OUTLINE-NEXT: .LBB7_1: // %atomicrmw.start
+; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; OUTLINE-NEXT: ldaxp x9, x8, [x0]
+; OUTLINE-NEXT: subs x10, x9, x2
+; OUTLINE-NEXT: sbc x11, x8, x3
+; OUTLINE-NEXT: stlxp w12, x10, x11, [x0]
+; OUTLINE-NEXT: cbnz w12, .LBB7_1
+; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; OUTLINE-NEXT: adrp x10, :got:var
+; OUTLINE-NEXT: ldr x10, [x10, :got_lo12:var]
+; OUTLINE-NEXT: stp x9, x8, [x10]
+; OUTLINE-NEXT: ret
+;
+; LSE-LABEL: fetch_and_sub:
+; LSE: // %bb.0:
+; LSE-NEXT: ldp x4, x5, [x0]
+; LSE-NEXT: .LBB7_1: // %atomicrmw.start
+; LSE-NEXT: // =>This Inner Loop Header: Depth=1
+; LSE-NEXT: mov x7, x5
+; LSE-NEXT: mov x6, x4
+; LSE-NEXT: subs x8, x4, x2
+; LSE-NEXT: sbc x9, x7, x3
+; LSE-NEXT: mov x4, x6
+; LSE-NEXT: mov x5, x7
+; LSE-NEXT: caspal x4, x5, x8, x9, [x0]
+; LSE-NEXT: cmp x5, x7
+; LSE-NEXT: ccmp x4, x6, #0, eq
+; LSE-NEXT: b.ne .LBB7_1
+; LSE-NEXT: // %bb.2: // %atomicrmw.end
+; LSE-NEXT: adrp x8, :got:var
+; LSE-NEXT: ldr x8, [x8, :got_lo12:var]
+; LSE-NEXT: stp x4, x5, [x8]
+; LSE-NEXT: ret
%val = atomicrmw sub i128* %p, i128 %bits seq_cst
store i128 %val, i128* @var, align 16
ret void
}
define void @fetch_and_min(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_min:
-; CHECK: // %bb.0:
-; CHECK-NEXT: .LBB8_1: // %atomicrmw.start
-; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldaxp x9, x8, [x0]
-; CHECK-NEXT: cmp x2, x9
-; CHECK-NEXT: sbcs xzr, x3, x8
-; CHECK-NEXT: csel x10, x8, x3, ge
-; CHECK-NEXT: csel x11, x9, x2, ge
-; CHECK-NEXT: stlxp w12, x11, x10, [x0]
-; CHECK-NEXT: cbnz w12, .LBB8_1
-; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: adrp x10, :got:var
-; CHECK-NEXT: ldr x10, [x10, :got_lo12:var]
-; CHECK-NEXT: stp x9, x8, [x10]
-; CHECK-NEXT: ret
-
+; NOOUTLINE-LABEL: fetch_and_min:
+; NOOUTLINE: // %bb.0:
+; NOOUTLINE-NEXT: .LBB8_1: // %atomicrmw.start
+; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; NOOUTLINE-NEXT: ldaxp x9, x8, [x0]
+; NOOUTLINE-NEXT: cmp x2, x9
+; NOOUTLINE-NEXT: sbcs xzr, x3, x8
+; NOOUTLINE-NEXT: csel x10, x8, x3, ge
+; NOOUTLINE-NEXT: csel x11, x9, x2, ge
+; NOOUTLINE-NEXT: stlxp w12, x11, x10, [x0]
+; NOOUTLINE-NEXT: cbnz w12, .LBB8_1
+; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; NOOUTLINE-NEXT: adrp x10, :got:var
+; NOOUTLINE-NEXT: ldr x10, [x10, :got_lo12:var]
+; NOOUTLINE-NEXT: stp x9, x8, [x10]
+; NOOUTLINE-NEXT: ret
+;
+; OUTLINE-LABEL: fetch_and_min:
+; OUTLINE: // %bb.0:
+; OUTLINE-NEXT: .LBB8_1: // %atomicrmw.start
+; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; OUTLINE-NEXT: ldaxp x9, x8, [x0]
+; OUTLINE-NEXT: cmp x2, x9
+; OUTLINE-NEXT: sbcs xzr, x3, x8
+; OUTLINE-NEXT: csel x10, x8, x3, ge
+; OUTLINE-NEXT: csel x11, x9, x2, ge
+; OUTLINE-NEXT: stlxp w12, x11, x10, [x0]
+; OUTLINE-NEXT: cbnz w12, .LBB8_1
+; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; OUTLINE-NEXT: adrp x10, :got:var
+; OUTLINE-NEXT: ldr x10, [x10, :got_lo12:var]
+; OUTLINE-NEXT: stp x9, x8, [x10]
+; OUTLINE-NEXT: ret
+;
+; LSE-LABEL: fetch_and_min:
+; LSE: // %bb.0:
+; LSE-NEXT: ldp x4, x5, [x0]
+; LSE-NEXT: .LBB8_1: // %atomicrmw.start
+; LSE-NEXT: // =>This Inner Loop Header: Depth=1
+; LSE-NEXT: mov x7, x5
+; LSE-NEXT: mov x6, x4
+; LSE-NEXT: cmp x2, x4
+; LSE-NEXT: sbcs xzr, x3, x7
+; LSE-NEXT: csel x9, x7, x3, ge
+; LSE-NEXT: csel x8, x4, x2, ge
+; LSE-NEXT: mov x4, x6
+; LSE-NEXT: mov x5, x7
+; LSE-NEXT: caspal x4, x5, x8, x9, [x0]
+; LSE-NEXT: cmp x5, x7
+; LSE-NEXT: ccmp x4, x6, #0, eq
+; LSE-NEXT: b.ne .LBB8_1
+; LSE-NEXT: // %bb.2: // %atomicrmw.end
+; LSE-NEXT: adrp x8, :got:var
+; LSE-NEXT: ldr x8, [x8, :got_lo12:var]
+; LSE-NEXT: stp x4, x5, [x8]
+; LSE-NEXT: ret
%val = atomicrmw min i128* %p, i128 %bits seq_cst
store i128 %val, i128* @var, align 16
ret void
}
define void @fetch_and_max(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_max:
-; CHECK: // %bb.0:
-; CHECK-NEXT: .LBB9_1: // %atomicrmw.start
-; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldaxp x9, x8, [x0]
-; CHECK-NEXT: cmp x2, x9
-; CHECK-NEXT: sbcs xzr, x3, x8
-; CHECK-NEXT: csel x10, x8, x3, lt
-; CHECK-NEXT: csel x11, x9, x2, lt
-; CHECK-NEXT: stlxp w12, x11, x10, [x0]
-; CHECK-NEXT: cbnz w12, .LBB9_1
-; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: adrp x10, :got:var
-; CHECK-NEXT: ldr x10, [x10, :got_lo12:var]
-; CHECK-NEXT: stp x9, x8, [x10]
-; CHECK-NEXT: ret
-
+; NOOUTLINE-LABEL: fetch_and_max:
+; NOOUTLINE: // %bb.0:
+; NOOUTLINE-NEXT: .LBB9_1: // %atomicrmw.start
+; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; NOOUTLINE-NEXT: ldaxp x9, x8, [x0]
+; NOOUTLINE-NEXT: cmp x2, x9
+; NOOUTLINE-NEXT: sbcs xzr, x3, x8
+; NOOUTLINE-NEXT: csel x10, x8, x3, lt
+; NOOUTLINE-NEXT: csel x11, x9, x2, lt
+; NOOUTLINE-NEXT: stlxp w12, x11, x10, [x0]
+; NOOUTLINE-NEXT: cbnz w12, .LBB9_1
+; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; NOOUTLINE-NEXT: adrp x10, :got:var
+; NOOUTLINE-NEXT: ldr x10, [x10, :got_lo12:var]
+; NOOUTLINE-NEXT: stp x9, x8, [x10]
+; NOOUTLINE-NEXT: ret
+;
+; OUTLINE-LABEL: fetch_and_max:
+; OUTLINE: // %bb.0:
+; OUTLINE-NEXT: .LBB9_1: // %atomicrmw.start
+; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; OUTLINE-NEXT: ldaxp x9, x8, [x0]
+; OUTLINE-NEXT: cmp x2, x9
+; OUTLINE-NEXT: sbcs xzr, x3, x8
+; OUTLINE-NEXT: csel x10, x8, x3, lt
+; OUTLINE-NEXT: csel x11, x9, x2, lt
+; OUTLINE-NEXT: stlxp w12, x11, x10, [x0]
+; OUTLINE-NEXT: cbnz w12, .LBB9_1
+; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; OUTLINE-NEXT: adrp x10, :got:var
+; OUTLINE-NEXT: ldr x10, [x10, :got_lo12:var]
+; OUTLINE-NEXT: stp x9, x8, [x10]
+; OUTLINE-NEXT: ret
+;
+; LSE-LABEL: fetch_and_max:
+; LSE: // %bb.0:
+; LSE-NEXT: ldp x4, x5, [x0]
+; LSE-NEXT: .LBB9_1: // %atomicrmw.start
+; LSE-NEXT: // =>This Inner Loop Header: Depth=1
+; LSE-NEXT: mov x7, x5
+; LSE-NEXT: mov x6, x4
+; LSE-NEXT: cmp x2, x4
+; LSE-NEXT: sbcs xzr, x3, x7
+; LSE-NEXT: csel x9, x7, x3, lt
+; LSE-NEXT: csel x8, x4, x2, lt
+; LSE-NEXT: mov x4, x6
+; LSE-NEXT: mov x5, x7
+; LSE-NEXT: caspal x4, x5, x8, x9, [x0]
+; LSE-NEXT: cmp x5, x7
+; LSE-NEXT: ccmp x4, x6, #0, eq
+; LSE-NEXT: b.ne .LBB9_1
+; LSE-NEXT: // %bb.2: // %atomicrmw.end
+; LSE-NEXT: adrp x8, :got:var
+; LSE-NEXT: ldr x8, [x8, :got_lo12:var]
+; LSE-NEXT: stp x4, x5, [x8]
+; LSE-NEXT: ret
%val = atomicrmw max i128* %p, i128 %bits seq_cst
store i128 %val, i128* @var, align 16
ret void
}
define void @fetch_and_umin(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_umin:
-; CHECK: // %bb.0:
-; CHECK-NEXT: .LBB10_1: // %atomicrmw.start
-; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldaxp x9, x8, [x0]
-; CHECK-NEXT: cmp x2, x9
-; CHECK-NEXT: sbcs xzr, x3, x8
-; CHECK-NEXT: csel x10, x8, x3, hs
-; CHECK-NEXT: csel x11, x9, x2, hs
-; CHECK-NEXT: stlxp w12, x11, x10, [x0]
-; CHECK-NEXT: cbnz w12, .LBB10_1
-; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: adrp x10, :got:var
-; CHECK-NEXT: ldr x10, [x10, :got_lo12:var]
-; CHECK-NEXT: stp x9, x8, [x10]
-; CHECK-NEXT: ret
-
+; NOOUTLINE-LABEL: fetch_and_umin:
+; NOOUTLINE: // %bb.0:
+; NOOUTLINE-NEXT: .LBB10_1: // %atomicrmw.start
+; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; NOOUTLINE-NEXT: ldaxp x9, x8, [x0]
+; NOOUTLINE-NEXT: cmp x2, x9
+; NOOUTLINE-NEXT: sbcs xzr, x3, x8
+; NOOUTLINE-NEXT: csel x10, x8, x3, hs
+; NOOUTLINE-NEXT: csel x11, x9, x2, hs
+; NOOUTLINE-NEXT: stlxp w12, x11, x10, [x0]
+; NOOUTLINE-NEXT: cbnz w12, .LBB10_1
+; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; NOOUTLINE-NEXT: adrp x10, :got:var
+; NOOUTLINE-NEXT: ldr x10, [x10, :got_lo12:var]
+; NOOUTLINE-NEXT: stp x9, x8, [x10]
+; NOOUTLINE-NEXT: ret
+;
+; OUTLINE-LABEL: fetch_and_umin:
+; OUTLINE: // %bb.0:
+; OUTLINE-NEXT: .LBB10_1: // %atomicrmw.start
+; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; OUTLINE-NEXT: ldaxp x9, x8, [x0]
+; OUTLINE-NEXT: cmp x2, x9
+; OUTLINE-NEXT: sbcs xzr, x3, x8
+; OUTLINE-NEXT: csel x10, x8, x3, hs
+; OUTLINE-NEXT: csel x11, x9, x2, hs
+; OUTLINE-NEXT: stlxp w12, x11, x10, [x0]
+; OUTLINE-NEXT: cbnz w12, .LBB10_1
+; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; OUTLINE-NEXT: adrp x10, :got:var
+; OUTLINE-NEXT: ldr x10, [x10, :got_lo12:var]
+; OUTLINE-NEXT: stp x9, x8, [x10]
+; OUTLINE-NEXT: ret
+;
+; LSE-LABEL: fetch_and_umin:
+; LSE: // %bb.0:
+; LSE-NEXT: ldp x4, x5, [x0]
+; LSE-NEXT: .LBB10_1: // %atomicrmw.start
+; LSE-NEXT: // =>This Inner Loop Header: Depth=1
+; LSE-NEXT: mov x7, x5
+; LSE-NEXT: mov x6, x4
+; LSE-NEXT: cmp x2, x4
+; LSE-NEXT: sbcs xzr, x3, x7
+; LSE-NEXT: csel x9, x7, x3, hs
+; LSE-NEXT: csel x8, x4, x2, hs
+; LSE-NEXT: mov x4, x6
+; LSE-NEXT: mov x5, x7
+; LSE-NEXT: caspal x4, x5, x8, x9, [x0]
+; LSE-NEXT: cmp x5, x7
+; LSE-NEXT: ccmp x4, x6, #0, eq
+; LSE-NEXT: b.ne .LBB10_1
+; LSE-NEXT: // %bb.2: // %atomicrmw.end
+; LSE-NEXT: adrp x8, :got:var
+; LSE-NEXT: ldr x8, [x8, :got_lo12:var]
+; LSE-NEXT: stp x4, x5, [x8]
+; LSE-NEXT: ret
%val = atomicrmw umin i128* %p, i128 %bits seq_cst
store i128 %val, i128* @var, align 16
ret void
}
define void @fetch_and_umax(i128* %p, i128 %bits) {
-; CHECK-LABEL: fetch_and_umax:
-; CHECK: // %bb.0:
-; CHECK-NEXT: .LBB11_1: // %atomicrmw.start
-; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldaxp x9, x8, [x0]
-; CHECK-NEXT: cmp x2, x9
-; CHECK-NEXT: sbcs xzr, x3, x8
-; CHECK-NEXT: csel x10, x8, x3, lo
-; CHECK-NEXT: csel x11, x9, x2, lo
-; CHECK-NEXT: stlxp w12, x11, x10, [x0]
-; CHECK-NEXT: cbnz w12, .LBB11_1
-; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: adrp x10, :got:var
-; CHECK-NEXT: ldr x10, [x10, :got_lo12:var]
-; CHECK-NEXT: stp x9, x8, [x10]
-; CHECK-NEXT: ret
-
+; NOOUTLINE-LABEL: fetch_and_umax:
+; NOOUTLINE: // %bb.0:
+; NOOUTLINE-NEXT: .LBB11_1: // %atomicrmw.start
+; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; NOOUTLINE-NEXT: ldaxp x9, x8, [x0]
+; NOOUTLINE-NEXT: cmp x2, x9
+; NOOUTLINE-NEXT: sbcs xzr, x3, x8
+; NOOUTLINE-NEXT: csel x10, x8, x3, lo
+; NOOUTLINE-NEXT: csel x11, x9, x2, lo
+; NOOUTLINE-NEXT: stlxp w12, x11, x10, [x0]
+; NOOUTLINE-NEXT: cbnz w12, .LBB11_1
+; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; NOOUTLINE-NEXT: adrp x10, :got:var
+; NOOUTLINE-NEXT: ldr x10, [x10, :got_lo12:var]
+; NOOUTLINE-NEXT: stp x9, x8, [x10]
+; NOOUTLINE-NEXT: ret
+;
+; OUTLINE-LABEL: fetch_and_umax:
+; OUTLINE: // %bb.0:
+; OUTLINE-NEXT: .LBB11_1: // %atomicrmw.start
+; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; OUTLINE-NEXT: ldaxp x9, x8, [x0]
+; OUTLINE-NEXT: cmp x2, x9
+; OUTLINE-NEXT: sbcs xzr, x3, x8
+; OUTLINE-NEXT: csel x10, x8, x3, lo
+; OUTLINE-NEXT: csel x11, x9, x2, lo
+; OUTLINE-NEXT: stlxp w12, x11, x10, [x0]
+; OUTLINE-NEXT: cbnz w12, .LBB11_1
+; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; OUTLINE-NEXT: adrp x10, :got:var
+; OUTLINE-NEXT: ldr x10, [x10, :got_lo12:var]
+; OUTLINE-NEXT: stp x9, x8, [x10]
+; OUTLINE-NEXT: ret
+;
+; LSE-LABEL: fetch_and_umax:
+; LSE: // %bb.0:
+; LSE-NEXT: ldp x4, x5, [x0]
+; LSE-NEXT: .LBB11_1: // %atomicrmw.start
+; LSE-NEXT: // =>This Inner Loop Header: Depth=1
+; LSE-NEXT: mov x7, x5
+; LSE-NEXT: mov x6, x4
+; LSE-NEXT: cmp x2, x4
+; LSE-NEXT: sbcs xzr, x3, x7
+; LSE-NEXT: csel x9, x7, x3, lo
+; LSE-NEXT: csel x8, x4, x2, lo
+; LSE-NEXT: mov x4, x6
+; LSE-NEXT: mov x5, x7
+; LSE-NEXT: caspal x4, x5, x8, x9, [x0]
+; LSE-NEXT: cmp x5, x7
+; LSE-NEXT: ccmp x4, x6, #0, eq
+; LSE-NEXT: b.ne .LBB11_1
+; LSE-NEXT: // %bb.2: // %atomicrmw.end
+; LSE-NEXT: adrp x8, :got:var
+; LSE-NEXT: ldr x8, [x8, :got_lo12:var]
+; LSE-NEXT: stp x4, x5, [x8]
+; LSE-NEXT: ret
%val = atomicrmw umax i128* %p, i128 %bits seq_cst
store i128 %val, i128* @var, align 16
ret void
}
define i128 @atomic_load_seq_cst(i128* %p) {
-; CHECK-LABEL: atomic_load_seq_cst:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, x0
-; CHECK-NEXT: .LBB12_1: // %atomicrmw.start
-; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldaxp x0, x1, [x8]
-; CHECK-NEXT: stlxp w9, x0, x1, [x8]
-; CHECK-NEXT: cbnz w9, .LBB12_1
-; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: ret
+; NOOUTLINE-LABEL: atomic_load_seq_cst:
+; NOOUTLINE: // %bb.0:
+; NOOUTLINE-NEXT: mov x8, x0
+; NOOUTLINE-NEXT: .LBB12_1: // %atomicrmw.start
+; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; NOOUTLINE-NEXT: ldaxp x0, x1, [x8]
+; NOOUTLINE-NEXT: stlxp w9, x0, x1, [x8]
+; NOOUTLINE-NEXT: cbnz w9, .LBB12_1
+; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; NOOUTLINE-NEXT: ret
+;
+; OUTLINE-LABEL: atomic_load_seq_cst:
+; OUTLINE: // %bb.0:
+; OUTLINE-NEXT: mov x8, x0
+; OUTLINE-NEXT: .LBB12_1: // %atomicrmw.start
+; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; OUTLINE-NEXT: ldaxp x0, x1, [x8]
+; OUTLINE-NEXT: stlxp w9, x0, x1, [x8]
+; OUTLINE-NEXT: cbnz w9, .LBB12_1
+; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; OUTLINE-NEXT: ret
+;
+; LSE-LABEL: atomic_load_seq_cst:
+; LSE: // %bb.0:
+; LSE-NEXT: mov x2, #0
+; LSE-NEXT: mov x3, #0
+; LSE-NEXT: caspal x2, x3, x2, x3, [x0]
+; LSE-NEXT: mov x0, x2
+; LSE-NEXT: mov x1, x3
+; LSE-NEXT: ret
%r = load atomic i128, i128* %p seq_cst, align 16
ret i128 %r
}
define i128 @atomic_load_relaxed(i64, i64, i128* %p) {
-; CHECK-LABEL: atomic_load_relaxed:
-; CHECK: // %bb.0:
-; CHECK-NEXT: .LBB13_1: // %atomicrmw.start
-; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldxp x0, x1, [x2]
-; CHECK-NEXT: stxp w8, x0, x1, [x2]
-; CHECK-NEXT: cbnz w8, .LBB13_1
-; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: ret
+; NOOUTLINE-LABEL: atomic_load_relaxed:
+; NOOUTLINE: // %bb.0:
+; NOOUTLINE-NEXT: .LBB13_1: // %atomicrmw.start
+; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; NOOUTLINE-NEXT: ldxp x0, x1, [x2]
+; NOOUTLINE-NEXT: stxp w8, x0, x1, [x2]
+; NOOUTLINE-NEXT: cbnz w8, .LBB13_1
+; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; NOOUTLINE-NEXT: ret
+;
+; OUTLINE-LABEL: atomic_load_relaxed:
+; OUTLINE: // %bb.0:
+; OUTLINE-NEXT: .LBB13_1: // %atomicrmw.start
+; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; OUTLINE-NEXT: ldxp x0, x1, [x2]
+; OUTLINE-NEXT: stxp w8, x0, x1, [x2]
+; OUTLINE-NEXT: cbnz w8, .LBB13_1
+; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; OUTLINE-NEXT: ret
+;
+; LSE-LABEL: atomic_load_relaxed:
+; LSE: // %bb.0:
+; LSE-NEXT: mov x0, #0
+; LSE-NEXT: mov x1, #0
+; LSE-NEXT: casp x0, x1, x0, x1, [x2]
+; LSE-NEXT: ret
%r = load atomic i128, i128* %p monotonic, align 16
ret i128 %r
}
define void @atomic_store_seq_cst(i128 %in, i128* %p) {
-; CHECK-LABEL: atomic_store_seq_cst:
-; CHECK: // %bb.0:
-; CHECK-NEXT: .LBB14_1: // %atomicrmw.start
-; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldaxp xzr, x8, [x2]
-; CHECK-NEXT: stlxp w8, x0, x1, [x2]
-; CHECK-NEXT: cbnz w8, .LBB14_1
-; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: ret
+; NOOUTLINE-LABEL: atomic_store_seq_cst:
+; NOOUTLINE: // %bb.0:
+; NOOUTLINE-NEXT: .LBB14_1: // %atomicrmw.start
+; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; NOOUTLINE-NEXT: ldaxp xzr, x8, [x2]
+; NOOUTLINE-NEXT: stlxp w8, x0, x1, [x2]
+; NOOUTLINE-NEXT: cbnz w8, .LBB14_1
+; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; NOOUTLINE-NEXT: ret
+;
+; OUTLINE-LABEL: atomic_store_seq_cst:
+; OUTLINE: // %bb.0:
+; OUTLINE-NEXT: .LBB14_1: // %atomicrmw.start
+; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; OUTLINE-NEXT: ldaxp xzr, x8, [x2]
+; OUTLINE-NEXT: stlxp w8, x0, x1, [x2]
+; OUTLINE-NEXT: cbnz w8, .LBB14_1
+; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; OUTLINE-NEXT: ret
+;
+; LSE-LABEL: atomic_store_seq_cst:
+; LSE: // %bb.0:
+; LSE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
+; LSE-NEXT: ldp x4, x5, [x2]
+; LSE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
+; LSE-NEXT: .LBB14_1: // %atomicrmw.start
+; LSE-NEXT: // =>This Inner Loop Header: Depth=1
+; LSE-NEXT: mov x6, x4
+; LSE-NEXT: mov x7, x5
+; LSE-NEXT: caspal x6, x7, x0, x1, [x2]
+; LSE-NEXT: cmp x7, x5
+; LSE-NEXT: ccmp x6, x4, #0, eq
+; LSE-NEXT: mov x4, x6
+; LSE-NEXT: mov x5, x7
+; LSE-NEXT: b.ne .LBB14_1
+; LSE-NEXT: // %bb.2: // %atomicrmw.end
+; LSE-NEXT: ret
store atomic i128 %in, i128* %p seq_cst, align 16
ret void
}
define void @atomic_store_release(i128 %in, i128* %p) {
-; CHECK-LABEL: atomic_store_release:
-; CHECK: // %bb.0:
-; CHECK-NEXT: .LBB15_1: // %atomicrmw.start
-; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldxp xzr, x8, [x2]
-; CHECK-NEXT: stlxp w8, x0, x1, [x2]
-; CHECK-NEXT: cbnz w8, .LBB15_1
-; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: ret
+; NOOUTLINE-LABEL: atomic_store_release:
+; NOOUTLINE: // %bb.0:
+; NOOUTLINE-NEXT: .LBB15_1: // %atomicrmw.start
+; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; NOOUTLINE-NEXT: ldxp xzr, x8, [x2]
+; NOOUTLINE-NEXT: stlxp w8, x0, x1, [x2]
+; NOOUTLINE-NEXT: cbnz w8, .LBB15_1
+; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; NOOUTLINE-NEXT: ret
+;
+; OUTLINE-LABEL: atomic_store_release:
+; OUTLINE: // %bb.0:
+; OUTLINE-NEXT: .LBB15_1: // %atomicrmw.start
+; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; OUTLINE-NEXT: ldxp xzr, x8, [x2]
+; OUTLINE-NEXT: stlxp w8, x0, x1, [x2]
+; OUTLINE-NEXT: cbnz w8, .LBB15_1
+; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; OUTLINE-NEXT: ret
+;
+; LSE-LABEL: atomic_store_release:
+; LSE: // %bb.0:
+; LSE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
+; LSE-NEXT: ldp x4, x5, [x2]
+; LSE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
+; LSE-NEXT: .LBB15_1: // %atomicrmw.start
+; LSE-NEXT: // =>This Inner Loop Header: Depth=1
+; LSE-NEXT: mov x6, x4
+; LSE-NEXT: mov x7, x5
+; LSE-NEXT: caspl x6, x7, x0, x1, [x2]
+; LSE-NEXT: cmp x7, x5
+; LSE-NEXT: ccmp x6, x4, #0, eq
+; LSE-NEXT: mov x4, x6
+; LSE-NEXT: mov x5, x7
+; LSE-NEXT: b.ne .LBB15_1
+; LSE-NEXT: // %bb.2: // %atomicrmw.end
+; LSE-NEXT: ret
store atomic i128 %in, i128* %p release, align 16
ret void
}
define void @atomic_store_relaxed(i128 %in, i128* %p) {
-; CHECK-LABEL: atomic_store_relaxed:
-; CHECK: // %bb.0:
-; CHECK-NEXT: .LBB16_1: // %atomicrmw.start
-; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldxp xzr, x8, [x2]
-; CHECK-NEXT: stxp w8, x0, x1, [x2]
-; CHECK-NEXT: cbnz w8, .LBB16_1
-; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: ret
+; NOOUTLINE-LABEL: atomic_store_relaxed:
+; NOOUTLINE: // %bb.0:
+; NOOUTLINE-NEXT: .LBB16_1: // %atomicrmw.start
+; NOOUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; NOOUTLINE-NEXT: ldxp xzr, x8, [x2]
+; NOOUTLINE-NEXT: stxp w8, x0, x1, [x2]
+; NOOUTLINE-NEXT: cbnz w8, .LBB16_1
+; NOOUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; NOOUTLINE-NEXT: ret
+;
+; OUTLINE-LABEL: atomic_store_relaxed:
+; OUTLINE: // %bb.0:
+; OUTLINE-NEXT: .LBB16_1: // %atomicrmw.start
+; OUTLINE-NEXT: // =>This Inner Loop Header: Depth=1
+; OUTLINE-NEXT: ldxp xzr, x8, [x2]
+; OUTLINE-NEXT: stxp w8, x0, x1, [x2]
+; OUTLINE-NEXT: cbnz w8, .LBB16_1
+; OUTLINE-NEXT: // %bb.2: // %atomicrmw.end
+; OUTLINE-NEXT: ret
+;
+; LSE-LABEL: atomic_store_relaxed:
+; LSE: // %bb.0:
+; LSE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
+; LSE-NEXT: ldp x4, x5, [x2]
+; LSE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
+; LSE-NEXT: .LBB16_1: // %atomicrmw.start
+; LSE-NEXT: // =>This Inner Loop Header: Depth=1
+; LSE-NEXT: mov x6, x4
+; LSE-NEXT: mov x7, x5
+; LSE-NEXT: casp x6, x7, x0, x1, [x2]
+; LSE-NEXT: cmp x7, x5
+; LSE-NEXT: ccmp x6, x4, #0, eq
+; LSE-NEXT: mov x4, x6
+; LSE-NEXT: mov x5, x7
+; LSE-NEXT: b.ne .LBB16_1
+; LSE-NEXT: // %bb.2: // %atomicrmw.end
+; LSE-NEXT: ret
store atomic i128 %in, i128* %p unordered, align 16
ret void
}