return false;
const TargetRegisterClass *ResRC;
- unsigned Opc;
+ unsigned Opc, CmpOpc;
// This only supports i32/i64, because i8/i16 aren't legal, and the generic
// extractvalue selection doesn't support that.
if (VT == MVT::i32) {
Opc = AArch64::CMP_SWAP_32;
+ CmpOpc = AArch64::SUBSWrs;
ResRC = &AArch64::GPR32RegClass;
} else if (VT == MVT::i64) {
Opc = AArch64::CMP_SWAP_64;
+ CmpOpc = AArch64::SUBSXrs;
ResRC = &AArch64::GPR64RegClass;
} else {
return false;
const unsigned ResultReg1 = createResultReg(ResRC);
const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
+ const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
// FIXME: MachineMemOperand doesn't support cmpxchg yet.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(ResultReg1, RegState::Define)
- .addReg(ResultReg2, RegState::Define)
- .addReg(AddrReg)
- .addReg(DesiredReg)
- .addReg(NewReg);
+ .addDef(ResultReg1)
+ .addDef(ScratchReg)
+ .addUse(AddrReg)
+ .addUse(DesiredReg)
+ .addUse(NewReg);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
+ .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
+ .addUse(ResultReg1)
+ .addUse(DesiredReg)
+ .addImm(0);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
+ .addDef(ResultReg2)
+ .addUse(AArch64::WZR)
+ .addUse(AArch64::WZR)
+ .addImm(AArch64CC::NE);
assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
updateValueMap(I, ResultReg1, 2);
// significantly more naive than the standard expansion: we conservatively
// assume seq_cst, strong cmpxchg and omit clrex on failure.
-let Constraints = "@earlyclobber $Rd,@earlyclobber $status",
+let Constraints = "@earlyclobber $Rd,@earlyclobber $scratch",
mayLoad = 1, mayStore = 1 in {
-def CMP_SWAP_8 : Pseudo<(outs GPR32:$Rd, GPR32:$status),
+def CMP_SWAP_8 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch),
(ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>,
Sched<[WriteAtomic]>;
-def CMP_SWAP_16 : Pseudo<(outs GPR32:$Rd, GPR32:$status),
+def CMP_SWAP_16 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch),
(ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>,
Sched<[WriteAtomic]>;
-def CMP_SWAP_32 : Pseudo<(outs GPR32:$Rd, GPR32:$status),
+def CMP_SWAP_32 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch),
(ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>,
Sched<[WriteAtomic]>;
-def CMP_SWAP_64 : Pseudo<(outs GPR64:$Rd, GPR32:$status),
+def CMP_SWAP_64 : Pseudo<(outs GPR64:$Rd, GPR32:$scratch),
(ins GPR64:$addr, GPR64:$desired, GPR64:$new), []>,
Sched<[WriteAtomic]>;
}
-let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi,@earlyclobber $status",
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi,@earlyclobber $scratch",
mayLoad = 1, mayStore = 1 in
-def CMP_SWAP_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32:$status),
+def CMP_SWAP_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32:$scratch),
(ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi,
GPR64:$newLo, GPR64:$newHi), []>,
Sched<[WriteAtomic]>;
; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w2, [x0]
; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]]
; CHECK-NEXT: [[DONE]]:
+; CHECK-NEXT: cmp [[OLD]], w1
+; CHECK-NEXT: cset [[STATUS:w[0-9]+]], eq
; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1
; CHECK-NEXT: str [[STATUS32]], [x3]
; CHECK-NEXT: mov w0, [[OLD]]
-; CHECK-NEXT: ret
define i32 @cmpxchg_monotonic_32(i32* %p, i32 %cmp, i32 %new, i32* %ps) #0 {
%tmp0 = cmpxchg i32* %p, i32 %cmp, i32 %new monotonic monotonic
%tmp1 = extractvalue { i32, i1 } %tmp0, 0
; CHECK-LABEL: cmpxchg_acq_rel_32_load:
; CHECK: // BB#0:
-; CHECK-NEXT: ldr [[NEW:w[0-9]+]], [x2]
+; CHECK: ldr [[NEW:w[0-9]+]], [x2]
; CHECK-NEXT: [[RETRY:.LBB[0-9_]+]]:
; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0]
; CHECK-NEXT: cmp [[OLD]], w1
; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x0]
; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]]
; CHECK-NEXT: [[DONE]]:
+; CHECK-NEXT: cmp [[OLD]], w1
+; CHECK-NEXT: cset [[STATUS:w[0-9]+]], eq
; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1
; CHECK-NEXT: str [[STATUS32]], [x3]
; CHECK-NEXT: mov w0, [[OLD]]
-; CHECK-NEXT: ret
define i32 @cmpxchg_acq_rel_32_load(i32* %p, i32 %cmp, i32* %pnew, i32* %ps) #0 {
%new = load i32, i32* %pnew
%tmp0 = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel acquire
; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], x2, [x0]
; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]]
; CHECK-NEXT: [[DONE]]:
+; CHECK-NEXT: cmp [[OLD]], x1
+; CHECK-NEXT: cset [[STATUS:w[0-9]+]], eq
; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1
; CHECK-NEXT: str [[STATUS32]], [x3]
; CHECK-NEXT: mov x0, [[OLD]]
-; CHECK-NEXT: ret
define i64 @cmpxchg_seq_cst_64(i64* %p, i64 %cmp, i64 %new, i32* %ps) #0 {
%tmp0 = cmpxchg i64* %p, i64 %cmp, i64 %new seq_cst seq_cst
%tmp1 = extractvalue { i64, i1 } %tmp0, 0