From 8b323f53eb4037e06c61f4b3479959056a4ca166 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Fri, 1 Feb 2019 19:53:44 +0000 Subject: [PATCH] [InstCombine] Extra null-checking on TFE/LWE support - If that operand is not ConstantInt, skip enabling TFE/LWE. Differential Revision: https://reviews.llvm.org/D57539 llvm-svn: 352904 --- llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 7 +++---- .../Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll | 7 +++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index b2863b0..e62faab 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -975,12 +975,11 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II, return nullptr; // Need to change to new instruction format - ConstantInt *TFC = nullptr; bool TFELWEEnabled = false; if (TFCIdx > 0) { - TFC = dyn_cast(II->getArgOperand(TFCIdx)); - TFELWEEnabled = TFC->getZExtValue() & 0x1 // TFE - || TFC->getZExtValue() & 0x2; // LWE + if (ConstantInt *TFC = dyn_cast(II->getArgOperand(TFCIdx))) + TFELWEEnabled = TFC->getZExtValue() & 0x1 // TFE + || TFC->getZExtValue() & 0x2; // LWE } if (TFELWEEnabled) diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll index 75e8618..afac106 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll @@ -2395,7 +2395,14 @@ define amdgpu_ps float @extract_elt0_image_getresinfo_1d_v4f32_i32(i32 %mip, <8 ret float %elt0 } +; Verify that we don't creash on non-constant operand. +define protected <4 x half> @__llvm_amdgcn_image_sample_d_1darray_v4f16_f32_f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 zeroext, i32, i32) local_unnamed_addr { + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32 %0, float %1, float %2, float %3, float %4, <8 x i32> %5, <4 x i32> %6, i1 zeroext %7, i32 %8, i32 %9) #1 + ret <4 x half> %tmp +} + declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) attributes #0 = { nounwind } attributes #1 = { nounwind readonly } -- 2.7.4