From fa3e840d3d7d14fe131b0df0db359025b9446b9e Mon Sep 17 00:00:00 2001 From: Thomas Symalla Date: Mon, 25 Jan 2021 15:20:24 +0100 Subject: [PATCH] Removed the generic virtual register creations. Reworked the tests. --- .../Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp | 36 +++++-------------- .../AMDGPU/GlobalISel/combine-short-clamp.ll | 40 +++++++++++----------- 2 files changed, 29 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp index 7cd368b..969be8f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp @@ -124,47 +124,29 @@ void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16( B.setInstrAndDebugLoc(MI); auto Unmerge = B.buildUnmerge(S32, Src); - Register Hi32 = Unmerge.getReg(0); - Register Lo32 = Unmerge.getReg(1); - MRI.setRegClass(Hi32, &AMDGPU::VGPR_32RegClass); - MRI.setRegClass(Lo32, &AMDGPU::VGPR_32RegClass); assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32); - Register CvtDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); const LLT V2S16 = LLT::vector(2, 16); - MRI.setType(CvtDst, V2S16); - - B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, - {CvtDst}, - {Hi32, Lo32}, + auto CvtPk = B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, + {V2S16}, + {Unmerge.getReg(0), Unmerge.getReg(1)}, MI.getFlags()); auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2); auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2); - auto MinBoundaryDst = B.buildConstant(S32, MinBoundary); - MRI.setRegClass(MinBoundaryDst.getReg(0), &AMDGPU::VGPR_32RegClass); - auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary); - MRI.setRegClass(MaxBoundaryDst.getReg(0), &AMDGPU::VGPR_32RegClass); - - Register MedDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - MRI.setType(MedDst, S32); - - Register CvtDst32 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - MRI.setType(CvtDst32, S32); - B.buildBitcast(CvtDst32, CvtDst); + auto Bitcast = B.buildBitcast({S32}, CvtPk); - B.buildInstr(AMDGPU::G_AMDGPU_MED3_S32, - {MedDst}, - {MinBoundaryDst.getReg(0), CvtDst32, MaxBoundaryDst.getReg(0)}, + auto Med3 = B.buildInstr(AMDGPU::G_AMDGPU_MED3_S32, + {S32}, + {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)}, MI.getFlags()); - Register TruncDst = MRI.createGenericVirtualRegister(LLT::scalar(16)); - B.buildTrunc(TruncDst, MedDst); - B.buildCopy(MI.getOperand(0).getReg(), TruncDst); + auto Trunc = B.buildTrunc(LLT::scalar(16), Med3); + B.buildCopy(MI.getOperand(0).getReg(), Trunc); MI.eraseFromParent(); } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll index 90d4735..7d74c60 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll @@ -8,12 +8,12 @@ declare i64 @llvm.smin.i64(i64, i64) ; GFX10-LABEL: {{^}}v_clamp_i64_i16 ; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] ; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] -; GFX6789: v_mov_b32_e32 [[B]], 0x7fff -; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0xffff8000 -; GFX6789: v_med3_i32 [[A]], [[C]], [[A]], [[B]] +; GFX6789: v_mov_b32_e32 [[B]], 0xffff8000 +; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x7fff +; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]] ; GFX10: v_cvt_pk_i16_i32_e64 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] -; GFX10: v_mov_b32_e32 [[C:v[0-9]+]], 0xffff8000 -; GFX10: v_med3_i32 [[A]], [[C]], [[A]], 0x7fff +; GFX10: v_mov_b32_e32 [[B]], 0x7fff +; GFX10: v_med3_i32 [[A]], 0xffff8000, [[A]], [[B]] define i16 @v_clamp_i64_i16(i64 %in) #0 { entry: %max = call i64 @llvm.smax.i64(i64 %in, i64 -32768) @@ -25,12 +25,12 @@ entry: ; GFX10-LABEL: {{^}}v_clamp_i64_i16_reverse ; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] ; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] -; GFX6789: v_mov_b32_e32 [[B]], 0x7fff -; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0xffff8000 -; GFX6789: v_med3_i32 [[A]], [[C]], [[A]], [[B]] +; GFX6789: v_mov_b32_e32 [[B]], 0xffff8000 +; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x7fff +; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]] ; GFX10: v_cvt_pk_i16_i32_e64 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] -; GFX10: v_mov_b32_e32 [[C:v[0-9]+]], 0xffff8000 -; GFX10: v_med3_i32 [[A]], [[C]], [[A]], 0x7fff +; GFX10: v_mov_b32_e32 [[B]], 0x7fff +; GFX10: v_med3_i32 [[A]], 0xffff8000, [[A]], [[B]] define i16 @v_clamp_i64_i16_reverse(i64 %in) #0 { entry: %min = call i64 @llvm.smin.i64(i64 %in, i64 32767) @@ -69,12 +69,12 @@ entry: ; GFX10-LABEL: {{^}}v_clamp_i64_i16_lower_than_short ; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] ; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] -; GFX6789: v_mov_b32_e32 [[B]], 0x100 -; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0xffffff01 -; GFX6789: v_med3_i32 [[A]], [[C]], [[A]], [[B]] +; GFX6789: v_mov_b32_e32 [[B]], 0xffffff01 +; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x100 +; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]] ; GFX10: v_cvt_pk_i16_i32_e64 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] -; GFX10: v_mov_b32_e32 [[C:v[0-9]+]], 0xffffff01 -; GFX10: v_med3_i32 [[A]], [[C]], [[A]], 0x100 +; GFX10: v_mov_b32_e32 [[B]], 0x100 +; GFX10: v_med3_i32 [[A]], 0xffffff01, [[A]], [[B]] define i16 @v_clamp_i64_i16_lower_than_short(i64 %in) #0 { entry: %min = call i64 @llvm.smin.i64(i64 %in, i64 256) @@ -86,12 +86,12 @@ entry: ; GFX10-LABEL: {{^}}v_clamp_i64_i16_lower_than_short_reverse ; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] ; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] -; GFX6789: v_mov_b32_e32 [[B]], 0x100 -; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0xffffff01 -; GFX6789: v_med3_i32 [[A]], [[C]], [[A]], [[B]] +; GFX6789: v_mov_b32_e32 [[B]], 0xffffff01 +; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x100 +; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]] ; GFX10: v_cvt_pk_i16_i32_e64 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] -; GFX10: v_mov_b32_e32 [[C:v[0-9]+]], 0xffffff01 -; GFX10: v_med3_i32 [[A]], [[C]], [[A]], 0x100 +; GFX10: v_mov_b32_e32 [[B]], 0x100 +; GFX10: v_med3_i32 [[A]], 0xffffff01, [[A]], [[B]] define i16 @v_clamp_i64_i16_lower_than_short_reverse(i64 %in) #0 { entry: %max = call i64 @llvm.smax.i64(i64 %in, i64 -255) -- 2.7.4