TARGET_BUILTIN(__builtin_amdgcn_mov_dpp, "iiIiIiIiIb", "nc", "dpp")
TARGET_BUILTIN(__builtin_amdgcn_update_dpp, "iiiIiIiIiIb", "nc", "dpp")
TARGET_BUILTIN(__builtin_amdgcn_s_dcache_wb, "v", "n", "gfx8-insts")
+TARGET_BUILTIN(__builtin_amdgcn_perm, "UiUiUiUi", "nc", "gfx8-insts")
//===----------------------------------------------------------------------===//
// GFX9+ only builtins.
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
typedef unsigned long ulong;
+typedef unsigned int uint;
// CHECK-LABEL: @test_div_fixup_f16
// CHECK: call half @llvm.amdgcn.div.fixup.f16
{
*out = __builtin_amdgcn_s_memtime();
}
+
+// CHECK-LABEL: @test_perm
+// CHECK: call i32 @llvm.amdgcn.perm(i32 %a, i32 %b, i32 %s)
+void test_perm(global uint* out, uint a, uint b, uint s)
+{
+ *out = __builtin_amdgcn_perm(a, b, s);
+}
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -verify -S -o - %s
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu hawaii -verify -S -o - %s
-void test_vi_s_dcache_wb()
+void test_vi_builtins()
{
__builtin_amdgcn_s_dcache_wb(); // expected-error {{'__builtin_amdgcn_s_dcache_wb' needs target feature gfx8-insts}}
+ (void)__builtin_amdgcn_perm(1, 2, 3); // expected-error {{'__builtin_amdgcn_perm' needs target feature gfx8-insts}}
}
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrConvergent, IntrWillReturn]>;
+// llvm.amdgcn.perm <src0> <src1> <selector>
+def int_amdgcn_perm :
+ GCCBuiltin<"__builtin_amdgcn_perm">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
+
//===----------------------------------------------------------------------===//
// GFX10 Intrinsics
//===----------------------------------------------------------------------===//
SDTCisInt<4>]>,
[]>;
-def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
+def AMDGPUperm_impl : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
// SI+ export
def AMDGPUExportOp : SDTypeProfile<0, 8, [
def AMDGPUdiv_fmas : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$vcc),
[(int_amdgcn_div_fmas node:$src0, node:$src1, node:$src2, node:$vcc),
(AMDGPUdiv_fmas_impl node:$src0, node:$src1, node:$src2, node:$vcc)]>;
+
+def AMDGPUperm : PatFrags<(ops node:$src0, node:$src1, node:$src2),
+ [(int_amdgcn_perm node:$src0, node:$src1, node:$src2),
+ (AMDGPUperm_impl node:$src0, node:$src1, node:$src2)]>;
case Intrinsic::amdgcn_cvt_pk_u8_f32:
case Intrinsic::amdgcn_alignbit:
case Intrinsic::amdgcn_alignbyte:
+ case Intrinsic::amdgcn_perm:
case Intrinsic::amdgcn_fdot2:
case Intrinsic::amdgcn_sdot2:
case Intrinsic::amdgcn_udot2:
case Intrinsic::amdgcn_alignbit:
return DAG.getNode(ISD::FSHR, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ case Intrinsic::amdgcn_perm:
+ return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3));
case Intrinsic::amdgcn_reloc_constant: {
Module *M = const_cast<Module *>(MF.getFunction().getParent());
const MDNode *Metadata = cast<MDNodeSDNode>(Op.getOperand(1))->getMD();
--- /dev/null
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -global-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+declare i32 @llvm.amdgcn.perm(i32, i32, i32) #0
+
+; GCN-LABEL: {{^}}v_perm_b32_v_v_v:
+; GCN: v_perm_b32 v{{[0-9]+}}, v0, v1, v2
+define amdgpu_ps void @v_perm_b32_v_v_v(i32 %src1, i32 %src2, i32 %src3, i32 addrspace(1)* %out) #1 {
+ %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 %src3) #0
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_perm_b32_v_v_c:
+; GCN: v_perm_b32 v{{[0-9]+}}, v0, v1, {{[vs][0-9]+}}
+define amdgpu_ps void @v_perm_b32_v_v_c(i32 %src1, i32 %src2, i32 addrspace(1)* %out) #1 {
+ %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_perm_b32_s_v_c:
+; GCN: v_perm_b32 v{{[0-9]+}}, s0, v0, v{{[0-9]+}}
+define amdgpu_ps void @v_perm_b32_s_v_c(i32 inreg %src1, i32 %src2, i32 addrspace(1)* %out) #1 {
+ %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_perm_b32_s_s_c:
+; GCN: v_perm_b32 v{{[0-9]+}}, s0, v{{[0-9]+}}, v{{[0-9]+}}
+define amdgpu_ps void @v_perm_b32_s_s_c(i32 inreg %src1, i32 inreg %src2, i32 addrspace(1)* %out) #1 {
+ %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_perm_b32_v_s_i:
+; GCN: v_perm_b32 v{{[0-9]+}}, v0, s0, 1
+define amdgpu_ps void @v_perm_b32_v_s_i(i32 %src1, i32 inreg %src2, i32 addrspace(1)* %out) #1 {
+ %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 1) #0
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }