From: Matt Arsenault Date: Wed, 30 Jan 2019 03:36:25 +0000 (+0000) Subject: AMDGPU/GlobalISel: Fix clamping shifts with 16-bit insts X-Git-Tag: llvmorg-10-init~13309 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f6cab162583c98838807cf020e3fbc99514c3c38;p=platform%2Fupstream%2Fllvm.git AMDGPU/GlobalISel: Fix clamping shifts with 16-bit insts llvm-svn: 352599 --- diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 434fcc5..0da4607 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -320,9 +320,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, // be more flexible with the shift amount type. auto &Shifts = getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR}) .legalFor({{S32, S32}, {S64, S32}}); - if (ST.has16BitInsts()) + if (ST.has16BitInsts()) { Shifts.legalFor({{S16, S32}, {S16, S16}}); - else + Shifts.clampScalar(0, S16, S64); + } else Shifts.clampScalar(0, S32, S64); Shifts.clampScalar(1, S32, S32); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir index 814891d..eb1cc74 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -250,3 +250,59 @@ body: | %5:_(s32) = G_ANYEXT %4 $vgpr0 = COPY %5 ... + +--- +name: test_ashr_i8_i8 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; SI-LABEL: name: test_ashr_i8_i8 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[AND]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) + ; SI: $vgpr0 = COPY [[COPY4]](s32) + ; VI-LABEL: name: test_ashr_i8_i8 + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC]](s16) + ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC]](s16) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[AND]](s32) + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) + ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-LABEL: name: test_ashr_i8_i8 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC]](s16) + ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC]](s16) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[AND]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s8) = G_TRUNC %0 + %3:_(s8) = G_TRUNC %1 + %4:_(s8) = G_ASHR %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir index d0999fc..15d5084 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir @@ -247,3 +247,58 @@ body: | %5:_(s32) = G_ANYEXT %4 $vgpr0 = COPY %5 ... + +--- +name: test_lshr_i8_i8 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; SI-LABEL: name: test_lshr_i8_i8 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[AND1]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: $vgpr0 = COPY [[COPY4]](s32) + ; VI-LABEL: name: test_lshr_i8_i8 + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND1]](s32) + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-LABEL: name: test_lshr_i8_i8 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND1]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s8) = G_TRUNC %0 + %3:_(s8) = G_TRUNC %1 + %4:_(s8) = G_LSHR %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir index 63b0476..fb94ab8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -265,3 +265,18 @@ body: | %5:_(s32) = G_ANYEXT %4 $vgpr0 = COPY %5 ... + +--- +name: test_shl_i8_i8 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s8) = G_TRUNC %0 + %3:_(s8) = G_TRUNC %1 + %4:_(s8) = G_SHL %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +...