From e97aa5609fa53827c8c09a0fc79075dc834f292d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 26 Jul 2020 23:01:28 -0400 Subject: [PATCH] AMDGPU/GlobalISel: Don't assert in LegalizerInfo constructor We don't really need these asserts. The LegalizerInfo is also overly-aggressivly constructed, even when not in use. It needs to not assert on dummy targets that have manually specified, unrelated features. --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 3 - .../test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll | 91 ++++++++++++++++++++++ 2 files changed, 91 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 673c5fc..f1962db 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -423,7 +423,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0); if (ST.hasVOP3PInsts()) { - assert(ST.hasIntClamp() && "all targets with VOP3P should support clamp"); getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL}) .legalFor({S32, S16, V2S16}) .clampScalar(0, S16, S32) @@ -445,8 +444,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0) .widenScalarToNextPow2(0, 32); // FIXME: min should be 16 - assert(ST.hasIntClamp() && "all targets with 16-bit should support clamp"); - // Technically the saturating operations require clamp bit support, but this // was introduced at the same time as 16-bit operations. getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT}) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll new file mode 100644 index 0000000..81f6b8e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -stop-after=legalizer -o - %s | FileCheck %s + +; Make sure legalizer info doesn't assert on dummy targets + +define i16 @vop3p_add_i16(i16 %arg0) #0 { + ; CHECK-LABEL: name: vop3p_add_i16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %add = add i16 %arg0, %arg0 + ret i16 %add +} + +define <2 x i16> @vop3p_add_v2i16(<2 x i16> %arg0) #0 { + ; CHECK-LABEL: name: vop3p_add_v2i16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC2]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC3]] + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %add = add <2 x i16> %arg0, %arg0 + ret <2 x i16> %add +} + +define i16 @halfinsts_add_i16(i16 %arg0) #1 { + ; CHECK-LABEL: name: halfinsts_add_i16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) + ; CHECK: $vgpr0 = COPY [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + %add = add i16 %arg0, %arg0 + ret i16 %add +} + +define <2 x i16> @halfinsts_add_v2i16(<2 x i16> %arg0) #1 { + ; CHECK-LABEL: name: halfinsts_add_v2i16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY3]], [[COPY4]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY5]], [[COPY6]] + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ADD1]](s32) + ; CHECK: $vgpr0 = COPY [[COPY7]](s32) + ; CHECK: $vgpr1 = COPY [[COPY8]](s32) + ; CHECK: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY9]], implicit $vgpr0, implicit $vgpr1 + %add = add <2 x i16> %arg0, %arg0 + ret <2 x i16> %add +} + +attributes #0 = { "target-features"="+vop3p" } +attributes #0 = { "target-features"="+16-bit-insts" } -- 2.7.4