From 573a5de7551cd33d00e67e4653d8c4e9e886b68b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 10 Oct 2022 19:42:17 -0700 Subject: [PATCH] llvm-reduce: Add opcode reduction pass Try some dumb strength reductions to "simpler" opcodes. Make some opcode substitutions I typically try to get smaller MIR out of codegen. This is a bit target specific and I have a lot of increasingly target specific modifications I try during manual reduction. --- llvm/test/tools/llvm-reduce/reduce-opcodes.ll | 220 ++++++++++++++++++++++++ llvm/tools/llvm-reduce/CMakeLists.txt | 1 + llvm/tools/llvm-reduce/DeltaManager.cpp | 2 + llvm/tools/llvm-reduce/deltas/ReduceOpcodes.cpp | 112 ++++++++++++ llvm/tools/llvm-reduce/deltas/ReduceOpcodes.h | 18 ++ 5 files changed, 353 insertions(+) create mode 100644 llvm/test/tools/llvm-reduce/reduce-opcodes.ll create mode 100644 llvm/tools/llvm-reduce/deltas/ReduceOpcodes.cpp create mode 100644 llvm/tools/llvm-reduce/deltas/ReduceOpcodes.h diff --git a/llvm/test/tools/llvm-reduce/reduce-opcodes.ll b/llvm/test/tools/llvm-reduce/reduce-opcodes.ll new file mode 100644 index 0000000..285283e --- /dev/null +++ b/llvm/test/tools/llvm-reduce/reduce-opcodes.ll @@ -0,0 +1,220 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=opcodes --test FileCheck --test-arg --check-prefixes=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck -check-prefix=RESULT %s < %t + +; CHECK-INTERESTINGNESS: @fdiv_fast( +; RESULT: %op = fmul fast float %a, %b, !dbg !7, !fpmath !13 +define float @fdiv_fast(float %a, float %b) { + %op = fdiv fast float %a, %b, !dbg !7, !fpmath !13 + ret float %op +} + +; CHECK-INTERESTINGNESS: @frem_nnan( +; RESULT: %op = fmul nnan float %a, %b, !dbg !7, !fpmath !13 +define float @frem_nnan(float %a, float %b) { + %op = frem nnan float %a, %b, !dbg !7, !fpmath !13 + ret float %op +} + +; CHECK-INTERESTINGNESS: @udiv( +; RESULT: %op = mul i32 %a, %b, !dbg !7 +define i32 @udiv(i32 %a, i32 %b) { + %op = udiv i32 %a, %b, !dbg !7 + ret i32 %op +} + +; CHECK-INTERESTINGNESS: @udiv_vec( +; RESULT: %op = mul <2 x i32> %a, %b, !dbg !7 +define <2 x i32> @udiv_vec(<2 x i32> %a, <2 x i32> %b) { + %op = udiv <2 x i32> %a, %b, !dbg !7 + ret <2 x i32> %op +} + +; CHECK-INTERESTINGNESS: @sdiv( +; RESULT: %op = mul i32 %a, %b{{$}} +define i32 @sdiv(i32 %a, i32 %b) { + %op = sdiv i32 %a, %b + ret i32 %op +} + +; CHECK-INTERESTINGNESS: @sdiv_exact( +; RESULT: %op = mul i32 %a, %b, !dbg !7 +define i32 @sdiv_exact(i32 %a, i32 %b) { + %op = sdiv exact i32 %a, %b, !dbg !7 + ret i32 %op +} + +; CHECK-INTERESTINGNESS: @urem( +; RESULT: %op = mul i32 %a, %b, !dbg !7 +define i32 @urem(i32 %a, i32 %b) { + %op = urem i32 %a, %b, !dbg !7 + ret i32 %op +} + +; CHECK-INTERESTINGNESS: @srem( +; RESULT: %op = mul i32 %a, %b, !dbg !7 +define i32 @srem(i32 %a, i32 %b) { + %op = srem i32 %a, %b, !dbg !7 + ret i32 %op +} + +; Make sure there's no crash if the IRBuilder decided to constant fold something +; CHECK-INTERESTINGNESS: @add_constant_fold( +; RESULT: %op = add i32 0, 0, !dbg !7 +define i32 @add_constant_fold() { + %op = add i32 0, 0, !dbg !7 + ret i32 %op +} + +; CHECK-INTERESTINGNESS: @add( +; RESULT: %op = or i32 %a, %b, !dbg !7 +define i32 @add(i32 %a, i32 %b) { + %op = add i32 %a, %b, !dbg !7 + ret i32 %op +} + +; CHECK-INTERESTINGNESS: @add_nuw( +; RESULT: %op = or i32 %a, %b, !dbg !7 +define i32 @add_nuw(i32 %a, i32 %b) { + %op = add nuw i32 %a, %b, !dbg !7 + ret i32 %op +} + +; CHECK-INTERESTINGNESS: @add_nsw( +; RESULT: %op = or i32 %a, %b, !dbg !7 +define i32 @add_nsw(i32 %a, i32 %b) { + %op = add nsw i32 %a, %b, !dbg !7 + ret i32 %op +} + +; CHECK-INTERESTINGNESS: @sub_nuw_nsw( +; RESULT: %op = or i32 %a, %b, !dbg !7 +define i32 @sub_nuw_nsw(i32 %a, i32 %b) { + %op = sub nuw nsw i32 %a, %b, !dbg !7 + ret i32 %op +} + +; CHECK-INTERESTINGNESS: @workitem_id_y( +; RESULT: %id = call i32 @llvm.amdgcn.workitem.id.x(), !dbg !7 +define i32 @workitem_id_y() { + %id = call i32 @llvm.amdgcn.workitem.id.y(), !dbg !7 + ret i32 %id +} + +; CHECK-INTERESTINGNESS: @workitem_id_z( +; RESULT: %id = call i32 @llvm.amdgcn.workitem.id.x(), !dbg !7 +define i32 @workitem_id_z() { + %id = call i32 @llvm.amdgcn.workitem.id.z(), !dbg !7 + ret i32 %id +} + +; CHECK-INTERESTINGNESS: @workgroup_id_y( +; RESULT: %id = call i32 @llvm.amdgcn.workgroup.id.x(), !dbg !7 +define i32 @workgroup_id_y() { + %id = call i32 @llvm.amdgcn.workgroup.id.y(), !dbg !7 + ret i32 %id +} + +; CHECK-INTERESTINGNESS: @workgroup_id_z( +; RESULT: %id = call i32 @llvm.amdgcn.workgroup.id.x(), !dbg !7 +define i32 @workgroup_id_z() { + %id = call i32 @llvm.amdgcn.workgroup.id.z(), !dbg !7 + ret i32 %id +} + +; CHECK-LABEL: @minnum_nsz( +; RESULT: %op = fmul nsz float %a, %b, !dbg !7 +define float @minnum_nsz(float %a, float %b) { + %op = call nsz float @llvm.minnum.f32(float %a, float %b), !dbg !7 + ret float %op +} + +; CHECK-LABEL: @maxnum_nsz( +; RESULT: %op = fmul nsz float %a, %b, !dbg !7 +define float @maxnum_nsz(float %a, float %b) { + %op = call nsz float @llvm.maxnum.f32(float %a, float %b), !dbg !7 + ret float %op +} + +; CHECK-LABEL: @minimum( +; RESULT: %op = fmul nsz float %a, %b, !dbg !7 +define float @minimum_nsz(float %a, float %b) { + %op = call nsz float @llvm.minimum.f32(float %a, float %b), !dbg !7 + ret float %op +} + +; CHECK-LABEL: @maximum( +; RESULT: %op = fmul nsz float %a, %b, !dbg !7 +define float @maximum_nsz(float %a, float %b) { + %op = call nsz float @llvm.maximum.f32(float %a, float %b), !dbg !7 + ret float %op +} + +; CHECK-LABEL: @sqrt_ninf( +; RESULT: %op = fmul ninf float %a, 2.000000e+00, !dbg !7 +define float @sqrt_ninf(float %a, float %b) { + %op = call ninf float @llvm.sqrt.f32(float %a), !dbg !7 + ret float %op +} + +; CHECK-LABEL: @sqrt_vec( +; RESULT: %op = fmul <2 x float> %a, , !dbg !7 +define <2 x float> @sqrt_vec(<2 x float> %a, <2 x float> %b) { + %op = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %a), !dbg !7 + ret <2 x float> %op +} + +; CHECK-LABEL: @div_fixup( +; RESULT: %op = call float @llvm.fma.f32(float %a, float %b, float %c) +define float @div_fixup(float %a, float %b, float %c) { + %op = call float @llvm.amdgcn.div.fixup.f32(float %a, float %b, float %c) + ret float %op +} + +; CHECK-LABEL: @fma_legacy( +; RESULT: %op = call float @llvm.fma.f32(float %a, float %b, float %c) +define float @fma_legacy(float %a, float %b, float %c) { + %op = call float @llvm.amdgcn.fma.legacy(float %a, float %b, float %c) + ret float %op +} + +; CHECK-LABEL: @fmul_legacy( +; RESULT: %op = fmul float %a, %b +define float @fmul_legacy(float %a, float %b) { + %op = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) + ret float %op +} + +declare i32 @llvm.amdgcn.workitem.id.y() +declare i32 @llvm.amdgcn.workitem.id.z() +declare i32 @llvm.amdgcn.workgroup.id.y() +declare i32 @llvm.amdgcn.workgroup.id.z() +declare float @llvm.amdgcn.div.fixup.f32(float, float, float) +declare float @llvm.amdgcn.fma.legacy(float, float, float) +declare float @llvm.amdgcn.fmul.legacy(float, float) + +declare float @llvm.sqrt.f32(float) +declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) +declare float @llvm.maxnum.f32(float, float) +declare float @llvm.minnum.f32(float, float) +declare float @llvm.maximum.f32(float, float) +declare float @llvm.minimum.f32(float, float) + +!llvm.dbg.cu = !{!0} +!opencl.ocl.version = !{!3, !3} +!llvm.module.flags = !{!4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "arst.c", directory: "/some/random/directory") +!2 = !{} +!3 = !{i32 2, i32 0} +!4 = !{i32 2, !"Dwarf Version", i32 2} +!5 = !{i32 2, !"Debug Info Version", i32 3} +!6 = !{!""} +!7 = !DILocation(line: 2, column: 6, scope: !8) +!8 = distinct !DISubprogram(name: "arst", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!9 = !DISubroutineType(types: !10) +!10 = !{null, !11} +!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64) +!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !{float 2.500000e+00} diff --git a/llvm/tools/llvm-reduce/CMakeLists.txt b/llvm/tools/llvm-reduce/CMakeLists.txt index 0dafd88..6f70806 100644 --- a/llvm/tools/llvm-reduce/CMakeLists.txt +++ b/llvm/tools/llvm-reduce/CMakeLists.txt @@ -39,6 +39,7 @@ add_llvm_tool(llvm-reduce deltas/ReduceMetadata.cpp deltas/ReduceModuleData.cpp deltas/ReduceOperandBundles.cpp + deltas/ReduceOpcodes.cpp deltas/ReduceSpecialGlobals.cpp deltas/ReduceOperands.cpp deltas/ReduceOperandsSkip.cpp diff --git a/llvm/tools/llvm-reduce/DeltaManager.cpp b/llvm/tools/llvm-reduce/DeltaManager.cpp index 2bd9b28..e544d27 100644 --- a/llvm/tools/llvm-reduce/DeltaManager.cpp +++ b/llvm/tools/llvm-reduce/DeltaManager.cpp @@ -32,6 +32,7 @@ #include "deltas/ReduceInstructionsMIR.h" #include "deltas/ReduceMetadata.h" #include "deltas/ReduceModuleData.h" +#include "deltas/ReduceOpcodes.h" #include "deltas/ReduceOperandBundles.h" #include "deltas/ReduceOperands.h" #include "deltas/ReduceOperandsSkip.h" @@ -89,6 +90,7 @@ static cl::list DELTA_PASS("simplify-cfg", reduceUsingSimplifyCFGDeltaPass) \ DELTA_PASS("attributes", reduceAttributesDeltaPass) \ DELTA_PASS("module-data", reduceModuleDataDeltaPass) \ + DELTA_PASS("opcodes", reduceOpcodesDeltaPass) \ } while (false) #define DELTA_PASSES_MIR \ diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.cpp b/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.cpp new file mode 100644 index 0000000..a834034 --- /dev/null +++ b/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.cpp @@ -0,0 +1,112 @@ +//===- ReduceOpcodes.cpp - Specialized Delta Pass -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Try to replace instructions that are likely to codegen to simpler or smaller +// sequences. This is a fuzzy and target specific concept. +// +//===----------------------------------------------------------------------===// + +#include "ReduceOpcodes.h" +#include "Delta.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" + +static Value *replaceIntrinsic(Module &M, IntrinsicInst *II, + Intrinsic::ID NewIID, + ArrayRef Tys = None) { + Function *NewFunc = Intrinsic::getDeclaration(&M, NewIID, Tys); + II->setCalledFunction(NewFunc); + return II; +} + +static Value *reduceInstruction(Module &M, Instruction &I) { + IRBuilder<> B(&I); + switch (I.getOpcode()) { + case Instruction::FDiv: + case Instruction::FRem: + // Divisions tends to codegen into a long sequence or a library call. + return B.CreateFMul(I.getOperand(0), I.getOperand(1)); + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + // Divisions tends to codegen into a long sequence or a library call. + return B.CreateMul(I.getOperand(0), I.getOperand(1)); + case Instruction::Add: + case Instruction::Sub: { + // Add/sub are more likely codegen to instructions with carry out side + // effects. + return B.CreateOr(I.getOperand(0), I.getOperand(1)); + } + case Instruction::Call: { + IntrinsicInst *II = dyn_cast(&I); + if (!II) + return nullptr; + + switch (II->getIntrinsicID()) { + case Intrinsic::sqrt: + return B.CreateFMul(II->getArgOperand(0), + ConstantFP::get(I.getType(), 2.0)); + case Intrinsic::minnum: + case Intrinsic::maxnum: + case Intrinsic::minimum: + case Intrinsic::maximum: + case Intrinsic::amdgcn_fmul_legacy: + return B.CreateFMul(II->getArgOperand(0), II->getArgOperand(1)); + case Intrinsic::amdgcn_workitem_id_y: + case Intrinsic::amdgcn_workitem_id_z: + return replaceIntrinsic(M, II, Intrinsic::amdgcn_workitem_id_x); + case Intrinsic::amdgcn_workgroup_id_y: + case Intrinsic::amdgcn_workgroup_id_z: + return replaceIntrinsic(M, II, Intrinsic::amdgcn_workgroup_id_x); + case Intrinsic::amdgcn_div_fixup: + case Intrinsic::amdgcn_fma_legacy: + return replaceIntrinsic(M, II, Intrinsic::fma, {II->getType()}); + default: + return nullptr; + } + + return nullptr; + } + default: + return nullptr; + } + + return nullptr; +} + +static void replaceOpcodesInModule(Oracle &O, Module &Mod) { + for (Function &F : Mod) { + for (BasicBlock &BB : F) + for (Instruction &I : make_early_inc_range(BB)) { + if (O.shouldKeep()) + continue; + + Instruction *Replacement = + dyn_cast_or_null(reduceInstruction(Mod, I)); + if (Replacement && Replacement != &I) { + if (auto *Op = dyn_cast(Replacement)) + Replacement->copyFastMathFlags(&I); + + Replacement->copyIRFlags(&I); + Replacement->copyMetadata(I); + Replacement->takeName(&I); + I.replaceAllUsesWith(Replacement); + I.eraseFromParent(); + } + } + } +} + +void llvm::reduceOpcodesDeltaPass(TestRunner &Test) { + outs() << "*** Reducing Opcodes...\n"; + runDeltaPass(Test, replaceOpcodesInModule); +} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.h b/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.h new file mode 100644 index 0000000..79edc7f --- /dev/null +++ b/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.h @@ -0,0 +1,18 @@ +//===- ReduceOpcodes.h - Specialized Delta Pass -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEOPCODES_H +#define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEOPCODES_H + +#include "TestRunner.h" + +namespace llvm { +void reduceOpcodesDeltaPass(TestRunner &Test); +} // namespace llvm + +#endif -- 2.7.4