From 4f64ade04cb241e0d7b292321ec7e89ebadb71b2 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 1 Jul 2019 15:18:56 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Select src modifiers llvm-svn: 364782 --- .../Target/AMDGPU/AMDGPUInstructionSelector.cpp | 46 ++++- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 3 + .../GlobalISel/inst-select-fcanonicalize.mir | 31 ++-- .../CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir | 190 +++++++++++++++++++-- 4 files changed, 234 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index ece01b5..3e7cd2d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1045,6 +1045,26 @@ AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const { } +std::pair +AMDGPUInstructionSelector::selectVOP3ModsImpl( + Register Src, const MachineRegisterInfo &MRI) const { + unsigned Mods = 0; + MachineInstr *MI = MRI.getVRegDef(Src); + + if (MI && MI->getOpcode() == AMDGPU::G_FNEG) { + Src = MI->getOperand(1).getReg(); + Mods |= SISrcMods::NEG; + MI = MRI.getVRegDef(Src); + } + + if (MI && MI->getOpcode() == AMDGPU::G_FABS) { + Src = MI->getOperand(1).getReg(); + Mods |= SISrcMods::ABS; + } + + return std::make_pair(Src, Mods); +} + /// /// This will select either an SGPR or VGPR operand and will save us from /// having to write an extra tablegen pattern. @@ -1057,11 +1077,18 @@ AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const { InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const { + MachineRegisterInfo &MRI + = Root.getParent()->getParent()->getParent()->getRegInfo(); + + Register Src; + unsigned Mods; + std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI); + return {{ - [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src0_mods - [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp - [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod + [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod }}; } InstructionSelector::ComplexRendererFns @@ -1075,9 +1102,16 @@ AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const { InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const { + MachineRegisterInfo &MRI + = Root.getParent()->getParent()->getParent()->getRegInfo(); + + Register Src; + unsigned Mods; + std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI); + return {{ - [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods + [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods }}; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 997198e..0631183 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -85,6 +85,9 @@ private: bool selectG_SELECT(MachineInstr &I) const; bool selectG_STORE(MachineInstr &I) const; + std::pair + selectVOP3ModsImpl(Register Src, const MachineRegisterInfo &MRI) const; + InstructionSelector::ComplexRendererFns selectVCSRC(MachineOperand &Root) const; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir index 45489b1..058e540 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir @@ -101,15 +101,13 @@ body: | bb.0: liveins: $vgpr0 ; GFX9-DENORM-LABEL: name: fcanonicalize_fabs_f32 - ; GFX9-DENORM: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-DENORM: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] - ; GFX9-DENORM: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32(s32) = V_MAX_F32_e64 0, [[FABS]](s32), 0, [[FABS]](s32), 0, 0, implicit $exec - ; GFX9-DENORM: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]](s32) + ; GFX9-DENORM: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-DENORM: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $exec + ; GFX9-DENORM: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX9-FLUSH-LABEL: name: fcanonicalize_fabs_f32 - ; GFX9-FLUSH: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-FLUSH: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]] - ; GFX9-FLUSH: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32(s32) = V_MUL_F32_e64 0, 1065353216, 0, [[FABS]](s32), 0, 0, implicit $exec - ; GFX9-FLUSH: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]](s32) + ; GFX9-FLUSH: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-FLUSH: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $exec + ; GFX9-FLUSH: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FABS %0 %2:vgpr(s32) = G_FCANONICALIZE %1 @@ -126,10 +124,9 @@ body: | bb.0: liveins: $vgpr0 ; GFX9-DENORM-LABEL: name: fcanonicalize_fneg_f32 - ; GFX9-DENORM: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-DENORM: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] - ; GFX9-DENORM: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32(s32) = V_MAX_F32_e64 0, [[FNEG]](s32), 0, [[FNEG]](s32), 0, 0, implicit $exec - ; GFX9-DENORM: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]](s32) + ; GFX9-DENORM: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-DENORM: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $exec + ; GFX9-DENORM: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] ; GFX9-FLUSH-LABEL: name: fcanonicalize_fneg_f32 ; GFX9-FLUSH: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-FLUSH: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $exec @@ -151,15 +148,13 @@ body: | liveins: $vgpr0 ; GFX9-DENORM-LABEL: name: fcanonicalize_fneg_fabs_f32 ; GFX9-DENORM: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-DENORM: [[FNEG:%[0-9]+]]:vgpr(s32) = G_FNEG [[COPY]] - ; GFX9-DENORM: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[FNEG]] - ; GFX9-DENORM: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32(s32) = V_MAX_F32_e64 0, [[FABS]](s32), 0, [[FABS]](s32), 0, 0, implicit $exec + ; GFX9-DENORM: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] + ; GFX9-DENORM: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32(s32) = V_MAX_F32_e64 2, [[FNEG]](s32), 2, [[FNEG]](s32), 0, 0, implicit $exec ; GFX9-DENORM: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]](s32) ; GFX9-FLUSH-LABEL: name: fcanonicalize_fneg_fabs_f32 ; GFX9-FLUSH: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9-FLUSH: [[FNEG:%[0-9]+]]:vgpr(s32) = G_FNEG [[COPY]] - ; GFX9-FLUSH: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[FNEG]] - ; GFX9-FLUSH: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32(s32) = V_MUL_F32_e64 0, 1065353216, 0, [[FABS]](s32), 0, 0, implicit $exec + ; GFX9-FLUSH: [[FNEG:%[0-9]+]]:vgpr_32(s32) = G_FNEG [[COPY]] + ; GFX9-FLUSH: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32(s32) = V_MUL_F32_e64 0, 1065353216, 2, [[FNEG]](s32), 0, 0, implicit $exec ; GFX9-FLUSH: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]](s32) %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir index f28caf2..254dde6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir @@ -1,37 +1,203 @@ -# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN ---- | - define amdgpu_kernel void @fmul(i32 addrspace(1)* %global0) {ret void} -... --- -name: fmul +name: fmul_f32 legalized: true regBankSelected: true -# GCN-LABEL: name: fmul body: | bb.0: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4 + ; GCN-LABEL: name: fmul_f32 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GCN: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_MUL_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: [[V_MUL_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: FLAT_STORE_DWORD [[COPY3]], [[V_MUL_F32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD [[COPY3]], [[V_MUL_F32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD [[COPY3]], [[V_MUL_F32_e64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 %2:vgpr(s32) = COPY $vgpr1 %3:vgpr(p1) = COPY $vgpr3_vgpr4 ; fmul vs - ; GCN: V_MUL_F32_e64 %4:vgpr(s32) = G_FMUL %1, %0 ; fmul sv - ; GCN: V_MUL_F32_e64 %5:vgpr(s32) = G_FMUL %0, %1 ; fmul vv - ; GCN: V_MUL_F32_e64 %6:vgpr(s32) = G_FMUL %1, %2 - G_STORE %4, %3 :: (store 4 into %ir.global0, addrspace 1) - G_STORE %5, %3 :: (store 4 into %ir.global0, addrspace 1) - G_STORE %6, %3 :: (store 4 into %ir.global0, addrspace 1) + G_STORE %4, %3 :: (store 4, addrspace 1) + G_STORE %5, %3 :: (store 4, addrspace 1) + G_STORE %6, %3 :: (store 4, addrspace 1) +... + +--- + +name: fmul_f64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; GCN-LABEL: name: fmul_f64 + ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; GCN: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 + ; GCN: [[V_MUL_F64_:%[0-9]+]]:vreg_64 = V_MUL_F64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_MUL_F64_1:%[0-9]+]]:vreg_64 = V_MUL_F64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: [[V_MUL_F64_2:%[0-9]+]]:vreg_64 = V_MUL_F64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: FLAT_STORE_DWORDX2 [[COPY3]], [[V_MUL_F64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORDX2 [[COPY3]], [[V_MUL_F64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORDX2 [[COPY3]], [[V_MUL_F64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:vgpr(s64) = COPY $vgpr0_vgpr1 + %2:vgpr(s64) = COPY $vgpr2_vgpr3 + %3:vgpr(p1) = COPY $vgpr4_vgpr5 + + ; fmul vs + %4:vgpr(s64) = G_FMUL %1, %0 + + ; fmul sv + %5:vgpr(s64) = G_FMUL %0, %1 + + ; fmul vv + %6:vgpr(s64) = G_FMUL %1, %2 + + G_STORE %4, %3 :: (store 8, addrspace 1) + G_STORE %5, %3 :: (store 8, addrspace 1) + G_STORE %6, %3 :: (store 8, addrspace 1) ... + --- + +name: fmul_f16 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4 + ; GCN-LABEL: name: fmul_f16 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_MUL_F16_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_MUL_F16_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_MUL_F16_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MUL_F16_e64_]], implicit [[V_MUL_F16_e64_1]], implicit [[V_MUL_F16_e64_2]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = COPY $vgpr1 + %3:vgpr(p1) = COPY $vgpr3_vgpr4 + + %4:sgpr(s16) = G_TRUNC %0 + %5:vgpr(s16) = G_TRUNC %1 + %6:vgpr(s16) = G_TRUNC %2 + + ; fmul vs + %8:vgpr(s16) = G_FMUL %4, %4 + + ; fmul sv + %9:vgpr(s16) = G_FMUL %4, %4 + + ; fmul vv + %10:vgpr(s16) = G_FMUL %4, %5 + + S_ENDPGM 0, implicit %8, implicit %9, implicit %10 +... + +--- + +name: fmul_modifiers_f32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 + ; GCN-LABEL: name: fmul_modifiers_f32 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GCN: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 2, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_MUL_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, [[COPY]], 2, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_MUL_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_MUL_F32_e64_3:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 1, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_MUL_F32_e64_4:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, [[COPY]], 1, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_MUL_F32_e64_5:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_MUL_F32_e64_6:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 3, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_MUL_F32_e64_7:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_MUL_F32_e64_8:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 3, [[COPY]], 3, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_MUL_F32_e64_9:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 3, [[COPY]], 1, [[COPY]], 0, 0, implicit $exec + ; GCN: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_3]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_4]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_5]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_6]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_7]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_8]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_9]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(p1) = COPY $vgpr2_vgpr3 + + %3:vgpr(s32) = G_FABS %0 + %4:vgpr(s32) = G_FNEG %0 + %5:vgpr(s32) = G_FNEG %3 + + ; fabs lhs + %6:vgpr(s32) = G_FMUL %3, %0 + + ; fabs rhs + %7:vgpr(s32) = G_FMUL %0, %3 + + ; fabs lhs, rhs + %8:vgpr(s32) = G_FMUL %3, %3 + + + ; fneg lhs + %9:vgpr(s32) = G_FMUL %4, %0 + + ; fneg rhs + %10:vgpr(s32) = G_FMUL %0, %4 + + ; fneg lhs, rhs + %11:vgpr(s32) = G_FMUL %4, %4 + + + ; fneg fabs lhs + %12:vgpr(s32) = G_FMUL %5, %0 + + ; fneg fabs rhs + %13:vgpr(s32) = G_FMUL %0, %5 + + ; fneg fabs lhs, rhs + %14:vgpr(s32) = G_FMUL %5, %5 + + + ; fneg fabs lhs, fneg rhs + %15:vgpr(s32) = G_FMUL %5, %4 + + G_STORE %6, %2 :: (store 4, addrspace 1) + G_STORE %7, %2 :: (store 4, addrspace 1) + G_STORE %8, %2 :: (store 4, addrspace 1) + G_STORE %9, %2 :: (store 4, addrspace 1) + G_STORE %10, %2 :: (store 4, addrspace 1) + G_STORE %11, %2 :: (store 4, addrspace 1) + G_STORE %12, %2 :: (store 4, addrspace 1) + G_STORE %13, %2 :: (store 4, addrspace 1) + G_STORE %14, %2 :: (store 4, addrspace 1) + G_STORE %15, %2 :: (store 4, addrspace 1) + +... -- 2.7.4