From: Matt Arsenault Date: Mon, 8 May 2023 13:34:03 +0000 (+0100) Subject: GlobalISel: Fold out G_FPTRUNC(G_FPEXT) X-Git-Tag: upstream/17.0.6~8928 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=62eac3e06891937a648014ccd49d55c0072bb92e;p=platform%2Fupstream%2Fllvm.git GlobalISel: Fold out G_FPTRUNC(G_FPEXT) --- diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 0c704831..40ccfb0 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1036,6 +1036,14 @@ def bitcast_bitcast_fold : GICombineRule< [{ return MRI.getType(${src0}.getReg()) == MRI.getType(${dst}.getReg()); }]), (apply [{ Helper.replaceSingleDefInstWithReg(*${op}, ${src0}.getReg()); }])>; + +def fptrunc_fpext_fold : GICombineRule< + (defs root:$dst), + (match (G_FPTRUNC $dst, $src1):$op, (G_FPEXT $src1, $src0), + [{ return MRI.getType(${src0}.getReg()) == MRI.getType(${dst}.getReg()); }]), + (apply [{ Helper.replaceSingleDefInstWithReg(*${op}, ${src0}.getReg()); }])>; + + def select_to_minmax: GICombineRule< (defs root:$root, build_fn_matchinfo:$info), (match (wip_match_opcode G_SELECT):$root, @@ -1063,7 +1071,7 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero, add_sub_reg, buildvector_identity_fold, trunc_buildvector_fold, trunc_lshr_buildvector_fold, - bitcast_bitcast_fold]>; + bitcast_bitcast_fold, fptrunc_fpext_fold]>; def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p, overlapping_and, mulo_by_2, mulo_by_0, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-fptrunc_fpext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-fptrunc_fpext.mir new file mode 100644 index 0000000..b6434c6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-fptrunc_fpext.mir @@ -0,0 +1,78 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -o - %s | FileCheck %s + +--- +name: fptrunc_fpext_s16_to_s32_to_s16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: fptrunc_fpext_s16_to_s32_to_s16 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: $vgpr0 = COPY %reg(s32) + %reg:_(s32) = COPY $vgpr0 + %src:_(s16) = G_TRUNC %reg + %fpext:_(s32) = G_FPEXT %src + %fptrunc:_(s16) = G_FPTRUNC %fpext + %anyext:_(s32) = G_ANYEXT %fptrunc + $vgpr0 = COPY %anyext +... + +--- +name: fptrunc_fpext_s16_to_s64_to_s32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: fptrunc_fpext_s16_to_s64_to_s32 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %reg:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %src:_(s16) = G_TRUNC %reg(s32) + ; CHECK-NEXT: %fpext:_(s64) = G_FPEXT %src(s16) + ; CHECK-NEXT: %fptrunc:_(s32) = G_FPTRUNC %fpext(s64) + ; CHECK-NEXT: $vgpr0 = COPY %fptrunc(s32) + %reg:_(s32) = COPY $vgpr0 + %src:_(s16) = G_TRUNC %reg + %fpext:_(s64) = G_FPEXT %src + %fptrunc:_(s32) = G_FPTRUNC %fpext + $vgpr0 = COPY %fptrunc +... + +--- +name: fptrunc_fpext_v2s16_to_v2s32_to_v2s16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: fptrunc_fpext_v2s16_to_v2s32_to_v2s16 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %src:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: $vgpr0 = COPY %src(<2 x s16>) + %src:_(<2 x s16>) = COPY $vgpr0 + %fpext:_(<2 x s32>) = G_FPEXT %src + %fptrunc:_(<2 x s16>) = G_FPTRUNC %fpext + $vgpr0 = COPY %fptrunc +... + +--- +name: fptrunc_fpext_v2s16_to_v2s64_to_v2s32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: fptrunc_fpext_v2s16_to_v2s64_to_v2s32 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %src:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: %fpext:_(<2 x s64>) = G_FPEXT %src(<2 x s16>) + ; CHECK-NEXT: %fptrunc:_(<2 x s32>) = G_FPTRUNC %fpext(<2 x s64>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %fptrunc(<2 x s32>) + %src:_(<2 x s16>) = COPY $vgpr0 + %fpext:_(<2 x s64>) = G_FPEXT %src + %fptrunc:_(<2 x s32>) = G_FPTRUNC %fpext + $vgpr0_vgpr1 = COPY %fptrunc +...