From 06f8d394242dcae10cf5f5fb6993658b80a8cabe Mon Sep 17 00:00:00 2001 From: Wei Ding Date: Fri, 9 Sep 2016 19:31:51 +0000 Subject: [PATCH] AMDGPU : Fix mqsad_u32_u8 instruction incorrect data type. Differential Revision: http://reviews.llvm.org/D23700 llvm-svn: 281081 --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +- llvm/lib/Target/AMDGPU/CIInstructions.td | 2 +- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 22 +++++++---- llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 2 + .../CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll | 46 +++++++++++++++++----- llvm/test/MC/AMDGPU/vop3-errs.s | 3 ++ llvm/test/MC/AMDGPU/vop3.s | 11 +++++- 7 files changed, 66 insertions(+), 22 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index c9286ee..bb62299 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -538,7 +538,7 @@ def int_amdgcn_mqsad_pk_u16_u8 : def int_amdgcn_mqsad_u32_u8 : GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_amdgcn_cvt_pk_u8_f32 : GCCBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">, diff --git a/llvm/lib/Target/AMDGPU/CIInstructions.td b/llvm/lib/Target/AMDGPU/CIInstructions.td index 07b7477..8b963a2 100644 --- a/llvm/lib/Target/AMDGPU/CIInstructions.td +++ b/llvm/lib/Target/AMDGPU/CIInstructions.td @@ -58,7 +58,7 @@ defm V_QSAD_PK_U16_U8 : VOP3Inst , "v_qsad_pk_u16_u8", VOP_I64_I64_I32_I64, int_amdgcn_qsad_pk_u16_u8>; defm V_MQSAD_U32_U8 : VOP3Inst , "v_mqsad_u32_u8", - VOP_I32_I32_I32_I32, int_amdgcn_mqsad_u32_u8>; + VOP_V4I32_I64_I32_V4I32, int_amdgcn_mqsad_u32_u8>; let isCommutable = 1 in { defm V_MAD_U64_U32 : VOP3Inst , "v_mad_u64_u32", diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index f68eef2..f706124 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -624,9 +624,10 @@ class getNumSrcArgs { // instructions for the given VT. class getVALUDstForVT { RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand, - !if(!eq(VT.Size, 64), VOPDstOperand, - !if(!eq(VT.Size, 16), VOPDstOperand, - VOPDstOperand))); // else VT == i1 + !if(!eq(VT.Size, 128), VOPDstOperand, + !if(!eq(VT.Size, 64), VOPDstOperand, + !if(!eq(VT.Size, 16), VOPDstOperand, + VOPDstOperand)))); // else VT == i1 } // Returns the register class to use for source 0 of VOP[12C] @@ -636,14 +637,15 @@ class getVOPSrc0ForVT { !if(!eq(VT.Value, f32.Value), 1, !if(!eq(VT.Value, f64.Value), 1, 0))); - RegisterOperand ret = !if(isFP, + RegisterOperand ret = !if(isFP, !if(!eq(VT.Size, 64), VSrc_f64, VSrc_f32), !if(!eq(VT.Size, 64), VSrc_b64, VSrc_b32)); } // Returns the vreg register class to use for source operand given VT class getVregSrcForVT { - RegisterClass ret = !if(!eq(VT.Size, 64), VReg_64, VGPR_32); + RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128, + !if(!eq(VT.Size, 64), VReg_64, VGPR_32)); } @@ -655,6 +657,8 @@ class getVOP3SrcForVT { !if(!eq(VT.Value, f64.Value), 1, 0))); RegisterOperand ret = + !if(!eq(VT.Size, 128), + VSrc_128, !if(!eq(VT.Size, 64), !if(isFP, VCSrc_f64, @@ -665,7 +669,8 @@ class getVOP3SrcForVT { VCSrc_f32, VCSrc_b32) ) - ); + ) + ); } // Returns 1 if the source arguments have modifiers, 0 if they do not. @@ -779,7 +784,7 @@ class getInsSDWA ; def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>; def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>; def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>; +def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>; // This class is used only with VOPC instructions. Use $sdst for out operand class SIInstAlias ; defm VSrc : RegImmOperand<"VS", "VSrc">; +def VSrc_128 : RegisterOperand; + //===----------------------------------------------------------------------===// // VSrc_* Operands with an VGPR //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll index d6dac62..04bb97a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll @@ -1,21 +1,47 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -declare i32 @llvm.amdgcn.mqsad.u32.u8(i32, i32, i32) #0 +declare <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64, i32, <4 x i32>) #0 -; GCN-LABEL: {{^}}v_mqsad_u32_u8: -; GCN: v_mqsad_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define void @v_mqsad_u32_u8(i32 addrspace(1)* %out, i32 %src) { - %result= call i32 @llvm.amdgcn.mqsad.u32.u8(i32 %src, i32 100, i32 100) #0 - store i32 %result, i32 addrspace(1)* %out, align 4 +; GCN-LABEL: {{^}}v_mqsad_u32_u8_use_non_inline_constant: +; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +define void @v_mqsad_u32_u8_use_non_inline_constant(<4 x i32> addrspace(1)* %out, i64 %src) { + %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 100, <4 x i32> ) #0 + store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_mqsad_u32_u8_non_immediate: -; GCN: v_mqsad_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define void @v_mqsad_u32_u8_non_immediate(i32 addrspace(1)* %out, i32 %src, i32 %a, i32 %b) { - %result= call i32 @llvm.amdgcn.mqsad.u32.u8(i32 %src, i32 %a, i32 %b) #0 - store i32 %result, i32 addrspace(1)* %out, align 4 +; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +define void @v_mqsad_u32_u8_non_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> %b) { + %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> %b) #0 + store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_integer_immediate: +; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +define void @v_mqsad_u32_u8_inline_integer_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) { + %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> ) #0 + store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_fp_immediate: +; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +define void @v_mqsad_u32_u8_inline_fp_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) { + %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> ) #0 + store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}v_mqsad_u32_u8_use_sgpr_vgpr: +; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +define void @v_mqsad_u32_u8_use_sgpr_vgpr(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> addrspace(1)* %input) { + %in = load <4 x i32>, <4 x i32> addrspace(1) * %input + + %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> %in) #0 + store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 ret void } diff --git a/llvm/test/MC/AMDGPU/vop3-errs.s b/llvm/test/MC/AMDGPU/vop3-errs.s index 45ad748..9e83214 100644 --- a/llvm/test/MC/AMDGPU/vop3-errs.s +++ b/llvm/test/MC/AMDGPU/vop3-errs.s @@ -6,3 +6,6 @@ v_add_f32_e64 v0, v1 v_div_scale_f32 v24, vcc, v22, 1.1, v22 // CHECK: error: invalid operand for instruction + +v_mqsad_u32_u8 v[0:3], s[2:3], v4, v[0:3] +// CHECK: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/vop3.s b/llvm/test/MC/AMDGPU/vop3.s index 8636287..b7d7203 100644 --- a/llvm/test/MC/AMDGPU/vop3.s +++ b/llvm/test/MC/AMDGPU/vop3.s @@ -1,5 +1,7 @@ -// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI -// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s --check-prefix=SICI +// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI +// RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI + +// RUN: llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s | FileCheck %s --check-prefix=CI // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOVI @@ -357,3 +359,8 @@ v_div_scale_f32 v24, vcc, v22, v22, 0xc0000000 v_mad_f32 v9, 0.5, v5, -v8 // SICI: v_mad_f32 v9, 0.5, v5, -v8 ; encoding: [0x09,0x00,0x82,0xd2,0xf0,0x0a,0x22,0x84] // VI: v_mad_f32 v9, 0.5, v5, -v8 ; encoding: [0x09,0x00,0xc1,0xd1,0xf0,0x0a,0x22,0x84] + +v_mqsad_u32_u8 v[0:3], s[2:3], v4, v[0:3] +// CI: v_mqsad_u32_u8 v[0:3], s[2:3], v4, v[0:3] ; encoding: [0x00,0x00,0xe8,0xd2,0x02,0x08,0x02,0x04] +// VI: v_mqsad_u32_u8 v[0:3], s[2:3], v4, v[0:3] ; encoding: [0x00,0x00,0xe7,0xd1,0x02,0x08,0x02,0x04] +// NOSI: error: instruction not supported on this GPU -- 2.7.4