Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
[IntrNoMem, IntrConvergent, IntrWillReturn]>;
+def int_amdgcn_ds_add_gs_reg_rtn :
+ GCCBuiltin<"__builtin_amdgcn_ds_add_gs_reg_rtn">,
+ Intrinsic<[llvm_anyint_ty], [llvm_i32_ty, llvm_i32_ty],
+ [ImmArg<ArgIndex<1>>, IntrHasSideEffects, IntrWillReturn]>;
+
+def int_amdgcn_ds_sub_gs_reg_rtn :
+ GCCBuiltin<"__builtin_amdgcn_ds_sub_gs_reg_rtn">,
+ Intrinsic<[llvm_anyint_ty], [llvm_i32_ty, llvm_i32_ty],
+ [ImmArg<ArgIndex<1>>, IntrHasSideEffects, IntrWillReturn]>;
+
//===----------------------------------------------------------------------===//
// Deep learning intrinsics.
//===----------------------------------------------------------------------===//
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
break;
}
+ case Intrinsic::amdgcn_ds_add_gs_reg_rtn:
+ case Intrinsic::amdgcn_ds_sub_gs_reg_rtn:
+ OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+ OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ break;
default:
return getInvalidInstructionMapping();
}
(DS_ORDERED_COUNT $value, (as_i16imm $offset))
>;
+def : GCNPat <
+ (i64 (int_amdgcn_ds_add_gs_reg_rtn i32:$src, timm:$offset32)),
+ (DS_ADD_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32))
+>;
+
+def : GCNPat <
+ (i32 (int_amdgcn_ds_add_gs_reg_rtn i32:$src, timm:$offset32)),
+ (EXTRACT_SUBREG
+ (i64 (COPY_TO_REGCLASS
+ (DS_ADD_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32)),
+ VReg_64)),
+ sub0)
+>;
+
+def : GCNPat <
+ (i64 (int_amdgcn_ds_sub_gs_reg_rtn i32:$src, timm:$offset32)),
+ (DS_SUB_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32))
+>;
+
+def : GCNPat <
+ (i32 (int_amdgcn_ds_sub_gs_reg_rtn i32:$src, timm:$offset32)),
+ (EXTRACT_SUBREG
+ (i64 (COPY_TO_REGCLASS
+ (DS_SUB_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32)),
+ VReg_64)),
+ sub0)
+>;
+
//===----------------------------------------------------------------------===//
// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s
+
+declare i32 @llvm.amdgcn.ds.add.gs.reg.rtn.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.ds.add.gs.reg.rtn.i64(i32, i32 immarg)
+
+define amdgpu_gs void @test_add_32(i32 %arg) {
+; CHECK-LABEL: test_add_32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: ds_add_gs_reg_rtn v[0:1], v0 offset:16 gds
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: buffer_gl0_inv
+; CHECK-NEXT: buffer_gl1_inv
+; CHECK-NEXT: s_endpgm
+ %unused = call i32 @llvm.amdgcn.ds.add.gs.reg.rtn.i32(i32 %arg, i32 16)
+ ret void
+}
+
+define amdgpu_gs void @test_add_32_use(i32 %arg, i32 addrspace(1)* %out) {
+; CHECK-LABEL: test_add_32_use:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: ds_add_gs_reg_rtn v[3:4], v0 offset:16 gds
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: buffer_gl0_inv
+; CHECK-NEXT: buffer_gl1_inv
+; CHECK-NEXT: global_store_b32 v[1:2], v3, off
+; CHECK-NEXT: s_endpgm
+ %res = call i32 @llvm.amdgcn.ds.add.gs.reg.rtn.i32(i32 %arg, i32 16)
+ store i32 %res, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+define amdgpu_gs void @test_add_64(i32 %arg) {
+; CHECK-LABEL: test_add_64:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: ds_add_gs_reg_rtn v[0:1], v0 offset:32 gds
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: buffer_gl0_inv
+; CHECK-NEXT: buffer_gl1_inv
+; CHECK-NEXT: s_endpgm
+ %unused = call i64 @llvm.amdgcn.ds.add.gs.reg.rtn.i64(i32 %arg, i32 32)
+ ret void
+}
+
+define amdgpu_gs void @test_add_64_use(i32 %arg, i64 addrspace(1)* %out) {
+; CHECK-LABEL: test_add_64_use:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: ds_add_gs_reg_rtn v[3:4], v0 offset:32 gds
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: buffer_gl0_inv
+; CHECK-NEXT: buffer_gl1_inv
+; CHECK-NEXT: global_store_b64 v[1:2], v[3:4], off
+; CHECK-NEXT: s_endpgm
+ %res = call i64 @llvm.amdgcn.ds.add.gs.reg.rtn.i64(i32 %arg, i32 32)
+ store i64 %res, i64 addrspace(1)* %out, align 4
+ ret void
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s
+
+declare i32 @llvm.amdgcn.ds.sub.gs.reg.rtn.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.ds.sub.gs.reg.rtn.i64(i32, i32 immarg)
+
+define amdgpu_gs void @test_sub_32(i32 %arg) {
+; CHECK-LABEL: test_sub_32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: ds_sub_gs_reg_rtn v[0:1], v0 offset:16 gds
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: buffer_gl0_inv
+; CHECK-NEXT: buffer_gl1_inv
+; CHECK-NEXT: s_endpgm
+ %unused = call i32 @llvm.amdgcn.ds.sub.gs.reg.rtn.i32(i32 %arg, i32 16)
+ ret void
+}
+
+define amdgpu_gs void @test_sub_32_use(i32 %arg, i32 addrspace(1)* %out) {
+; CHECK-LABEL: test_sub_32_use:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: ds_sub_gs_reg_rtn v[3:4], v0 offset:16 gds
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: buffer_gl0_inv
+; CHECK-NEXT: buffer_gl1_inv
+; CHECK-NEXT: global_store_b32 v[1:2], v3, off
+; CHECK-NEXT: s_endpgm
+ %res = call i32 @llvm.amdgcn.ds.sub.gs.reg.rtn.i32(i32 %arg, i32 16)
+ store i32 %res, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+define amdgpu_gs void @test_sub_64(i32 %arg) {
+; CHECK-LABEL: test_sub_64:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: ds_sub_gs_reg_rtn v[0:1], v0 offset:32 gds
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: buffer_gl0_inv
+; CHECK-NEXT: buffer_gl1_inv
+; CHECK-NEXT: s_endpgm
+ %unused = call i64 @llvm.amdgcn.ds.sub.gs.reg.rtn.i64(i32 %arg, i32 32)
+ ret void
+}
+
+define amdgpu_gs void @test_sub_64_use(i32 %arg, i64 addrspace(1)* %out) {
+; CHECK-LABEL: test_sub_64_use:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: ds_sub_gs_reg_rtn v[3:4], v0 offset:32 gds
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
+; CHECK-NEXT: buffer_gl0_inv
+; CHECK-NEXT: buffer_gl1_inv
+; CHECK-NEXT: global_store_b64 v[1:2], v[3:4], off
+; CHECK-NEXT: s_endpgm
+ %res = call i64 @llvm.amdgcn.ds.sub.gs.reg.rtn.i64(i32 %arg, i32 32)
+ store i64 %res, i64 addrspace(1)* %out, align 4
+ ret void
+}
+