From c463fd136ec259ec269ee6741763ce595811da71 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 19 Sep 2020 08:26:38 -0400 Subject: [PATCH] GlobalISel: Fix truncating shift amount in trunc (shl) combine The shift amount type does not necessarily match the result type. This was inserting a trunc from s32 to s32, which asserted. Just preserve the original shift amount type which can be legalized later. --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 3 +- .../CodeGen/AArch64/GlobalISel/combine-trunc.mir | 3 +- .../AMDGPU/GlobalISel/combine-trunc-shl.mir | 70 ++++++++++++++++++++++ 3 files changed, 72 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/combine-trunc-shl.mir diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 938f559..65a71c4 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -2177,8 +2177,7 @@ bool CombinerHelper::applyCombineTruncOfShl( Register ShiftAmt = MatchInfo.second; Builder.setInstrAndDebugLoc(MI); auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc); - auto TruncShiftAmt = Builder.buildTrunc(DstTy, ShiftAmt); - Builder.buildShl(DstReg, TruncShiftSrc, TruncShiftAmt, SrcMI->getFlags()); + Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags()); MI.eraseFromParent(); return true; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir index eb1652c..9f1d403 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir @@ -114,8 +114,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) - ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s32) ; CHECK: $h0 = COPY [[SHL]](s16) %0:_(s32) = COPY $w0 %1:_(s32) = G_CONSTANT i32 2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-trunc-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-trunc-shl.mir new file mode 100644 index 0000000..00cac80 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-trunc-shl.mir @@ -0,0 +1,70 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: trunc_s32_shl_s64_5 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: trunc_s32_shl_s64_5 + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) + ; CHECK: $vgpr0 = COPY [[SHL]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s64) = G_SHL %0:_, %1 + %3:_(s32) = G_TRUNC %2 + $vgpr0 = COPY %3 +... + +--- +name: trunc_s16_shl_s32_5 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: trunc_s16_shl_s32_5 + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s16) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s32) = G_SHL %0:_, %1 + %3:_(s16) = G_TRUNC %2 + S_ENDPGM 0, implicit %3 + +... + +--- +name: trunc_s16_shl_s64_5 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: trunc_s16_shl_s64_5 + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s64) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s16) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s64) = G_SHL %0:_, %1 + %3:_(s16) = G_TRUNC %2 + S_ENDPGM 0, implicit %3 + +... -- 2.7.4