From 191507e0b730e95321d2b4126e8a0220e013bdcd Mon Sep 17 00:00:00 2001 From: Marek Olsak Date: Tue, 3 Feb 2015 17:38:12 +0000 Subject: [PATCH] R600/SI: Don't generate non-existent LSHL, LSHR, ASHR B32 variants on VI MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This can happen when a REV instruction is commuted. The trick is not to define the _vi versions of instructions, which has these consequences: - code generation will always fail if a pseudo cannot be lowered (very useful to catch bugs where an unsupported instruction somehow makes it to the printer) - ability to query if a pseudo can be lowered, which is done in commuteOpcode to prevent REV from commuting to non-REV on VI Tested-by: Michel Dänzer llvm-svn: 227990 --- llvm/lib/Target/R600/SIInstrInfo.cpp | 8 ++++++-- llvm/lib/Target/R600/SIInstrInfo.td | 34 ++++++++++++++++++++++++++++++---- llvm/lib/Target/R600/SIInstructions.td | 10 +++++----- llvm/test/CodeGen/R600/shl.ll | 25 ++++++++++++++++++++++++- llvm/test/CodeGen/R600/sra.ll | 30 +++++++++++++++++++++++++++++- 5 files changed, 94 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/R600/SIInstrInfo.cpp b/llvm/lib/Target/R600/SIInstrInfo.cpp index 485c4ab..3c863eb 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.cpp +++ b/llvm/lib/Target/R600/SIInstrInfo.cpp @@ -407,11 +407,15 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const { int NewOpc; // Try to map original to commuted opcode - if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1) + NewOpc = AMDGPU::getCommuteRev(Opcode); + // Check if the commuted (REV) opcode exists on the target. + if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1) return NewOpc; // Try to map commuted to original opcode - if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1) + NewOpc = AMDGPU::getCommuteOrig(Opcode); + // Check if the original (non-REV) opcode exists on the target. + if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1) return NewOpc; return Opcode; diff --git a/llvm/lib/Target/R600/SIInstrInfo.td b/llvm/lib/Target/R600/SIInstrInfo.td index a8f73392..3159f9f 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.td +++ b/llvm/lib/Target/R600/SIInstrInfo.td @@ -944,13 +944,24 @@ multiclass VOP3_2_m , VOP2_REV; - def _si : VOP3_Real_si , + def _si : VOP3_Real_si , VOP3DisableFields<1, 0, HasMods>; - def _vi : VOP3_Real_vi , + def _vi : VOP3_Real_vi , + VOP3DisableFields<1, 0, HasMods>; +} + +multiclass VOP3SI_2_m pattern, string opName, string revOp, + bit HasMods = 1, bit UseFullOp = 0> { + + def "" : VOP3_Pseudo , + VOP2_REV; + + def _si : VOP3_Real_si , VOP3DisableFields<1, 0, HasMods>; + + // No VI instruction. This class is for SI only. } multiclass VOP3b_2_m ; +multiclass VOP2InstSI { + defm _e32 : VOP2SI_m ; + + defm _e64 : VOP3SI_2_m ; +} + multiclass VOP2b_Helper pat32, dag ins64, string asm64, list pat64, diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index 75776c9..39c3fd0 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -1540,21 +1540,21 @@ defm V_WRITELANE_B32 : VOP2SI_3VI_m < // These instructions only exist on SI and CI let SubtargetPredicate = isSICI in { -defm V_MIN_LEGACY_F32 : VOP2Inst , "v_min_legacy_f32", +defm V_MIN_LEGACY_F32 : VOP2InstSI , "v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy >; -defm V_MAX_LEGACY_F32 : VOP2Inst , "v_max_legacy_f32", +defm V_MAX_LEGACY_F32 : VOP2InstSI , "v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy >; let isCommutable = 1 in { -defm V_LSHR_B32 : VOP2Inst , "v_lshr_b32", VOP_I32_I32_I32, srl>; -defm V_ASHR_I32 : VOP2Inst , "v_ashr_i32", +defm V_LSHR_B32 : VOP2InstSI , "v_lshr_b32", VOP_I32_I32_I32, srl>; +defm V_ASHR_I32 : VOP2InstSI , "v_ashr_i32", VOP_I32_I32_I32, sra >; let hasPostISelHook = 1 in { -defm V_LSHL_B32 : VOP2Inst , "v_lshl_b32", VOP_I32_I32_I32, shl>; +defm V_LSHL_B32 : VOP2InstSI , "v_lshl_b32", VOP_I32_I32_I32, shl>; } } // End isCommutable = 1 diff --git a/llvm/test/CodeGen/R600/shl.ll b/llvm/test/CodeGen/R600/shl.ll index 75341a2..ff2f096 100644 --- a/llvm/test/CodeGen/R600/shl.ll +++ b/llvm/test/CodeGen/R600/shl.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI-CHECK %s ;EG-CHECK: {{^}}shl_v2i32: ;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} @@ -10,6 +10,10 @@ ;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK: {{^}}shl_v2i32: +;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} + define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 %a = load <2 x i32> addrspace(1) * %in @@ -31,6 +35,12 @@ define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in ;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK: {{^}}shl_v4i32: +;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} + define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 %a = load <4 x i32> addrspace(1) * %in @@ -55,6 +65,9 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in ;SI-CHECK: {{^}}shl_i64: ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: {{^}}shl_i64: +;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1 %a = load i64 addrspace(1) * %in @@ -90,6 +103,10 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: {{^}}shl_v2i64: +;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1 %a = load <2 x i64> addrspace(1) * %in @@ -147,6 +164,12 @@ define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: {{^}}shl_v4i64: +;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1 %a = load <4 x i64> addrspace(1) * %in diff --git a/llvm/test/CodeGen/R600/sra.ll b/llvm/test/CodeGen/R600/sra.ll index f062e4c..44c1101 100644 --- a/llvm/test/CodeGen/R600/sra.ll +++ b/llvm/test/CodeGen/R600/sra.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI-CHECK %s ;EG-CHECK-LABEL: {{^}}ashr_v2i32: ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} @@ -10,6 +10,10 @@ ;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK-LABEL: {{^}}ashr_v2i32: +;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} + define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 %a = load <2 x i32> addrspace(1) * %in @@ -31,6 +35,12 @@ define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i ;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} ;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK-LABEL: {{^}}ashr_v4i32: +;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} + define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 %a = load <4 x i32> addrspace(1) * %in @@ -45,6 +55,10 @@ define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i ;SI-CHECK-LABEL: {{^}}ashr_i64: ;SI-CHECK: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8 + +;VI-CHECK-LABEL: {{^}}ashr_i64: +;VI-CHECK: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8 + define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) { entry: %0 = sext i32 %in to i64 @@ -69,6 +83,10 @@ entry: ;SI-CHECK-LABEL: {{^}}ashr_i64_2: ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + +;VI-CHECK-LABEL: {{^}}ashr_i64_2: +;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { entry: %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1 @@ -109,6 +127,10 @@ entry: ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK-LABEL: {{^}}ashr_v2i64: +;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1 %a = load <2 x i64> addrspace(1) * %in @@ -174,6 +196,12 @@ define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %i ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK-LABEL: {{^}}ashr_v4i64: +;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} +;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} + define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1 %a = load <4 x i64> addrspace(1) * %in -- 2.7.4