/// intrinsic.
static unsigned getStlxrOpcode(unsigned NumBytesToStore) {
switch (NumBytesToStore) {
- // TODO: 1, 2, and 4 byte stores.
+ // TODO: 1 and 2 byte stores
+ case 4:
+ return AArch64::STLXRW;
case 8:
return AArch64::STLXRX;
default:
unsigned Opc = getStlxrOpcode(NumBytesToStore);
if (!Opc)
return false;
-
- auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg});
+ unsigned NumBitsToStore = NumBytesToStore * 8;
+ if (NumBitsToStore != 64) {
+ // The intrinsic always has a 64-bit source, but we might actually want
+ // a differently-sized source for the instruction. Try to get it.
+ // TODO: For 1 and 2-byte stores, this will have a G_AND. For now, let's
+ // just handle 4-byte stores.
+ // TODO: If we don't find a G_ZEXT, we'll have to truncate the value down
+ // to the right size for the STLXR.
+ MachineInstr *Zext = getOpcodeDef(TargetOpcode::G_ZEXT, SrcReg, MRI);
+ if (!Zext)
+ return false;
+ SrcReg = Zext->getOperand(1).getReg();
+ // We should get an appropriately-sized register here.
+ if (RBI.getSizeInBits(SrcReg, MRI, TRI) != NumBitsToStore)
+ return false;
+ }
+ auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg})
+ .addMemOperand(*I.memoperands_begin());
constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI);
}
define i32 @test_store_release_i64(i32 %a, i64* %addr) {
ret i32 %a
}
+
+ define i32 @test_store_release_i32(i32 %a, i64* %addr) {
+ ret i32 %a
+ }
...
---
name: test_store_release_i64
; CHECK: liveins: $w0, $x1, $x2
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x1
; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x2
- ; CHECK: early-clobber %2:gpr32 = STLXRX [[COPY]], [[COPY1]]
+ ; CHECK: early-clobber %2:gpr32 = STLXRX [[COPY]], [[COPY1]] :: (volatile store 8 into %ir.addr)
; CHECK: $w0 = COPY %2
; CHECK: RET_ReallyLR implicit $w0
%1:gpr(s64) = COPY $x1
RET_ReallyLR implicit $w0
...
+---
+name: test_store_release_i32
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1, $x2
+ ; CHECK-LABEL: name: test_store_release_i32
+ ; CHECK: liveins: $w0, $w1, $x2
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x2
+ ; CHECK: early-clobber %3:gpr32 = STLXRW [[COPY]], [[COPY1]] :: (volatile store 4 into %ir.addr)
+ ; CHECK: $w0 = COPY %3
+ ; CHECK: RET_ReallyLR implicit $w0
+ %1:gpr(s32) = COPY $w1
+ %2:gpr(p0) = COPY $x2
+ %3:gpr(s64) = G_ZEXT %1(s32)
+ %4:gpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.stlxr), %3(s64), %2(p0) :: (volatile store 4 into %ir.addr)
+ $w0 = COPY %4(s32)
+ RET_ReallyLR implicit $w0
+
+...
ret i32 %res
}
+; FALLBACK-NOT: remark:{{.*}}test_store_release_i32
define i32 @test_store_release_i32(i32, i32 %val, i32* %addr) {
; CHECK-LABEL: test_store_release_i32:
; CHECK-NOT: uxtw
; CHECK-NOT: and
; CHECK: stlxr w0, w1, [x2]
+; GISEL-LABEL: test_store_release_i32:
+; GISEL-NOT: uxtw
+; GISEL-NOT: and
+; GISEL: stlxr w0, w1, [x2]
%extval = zext i32 %val to i64
%res = call i32 @llvm.aarch64.stlxr.p0i32(i64 %extval, i32* %addr)
ret i32 %res