hoisted constants.
The constant hoisting pass tries to hoist large constants into predecessors and also
generates remat instructions in terms of the hoisted constants. These aim to prevent
codegen from rematerializing expensive constants multiple times. So we can re-use
this optimization, we can preserve the no-op bitcasts that are used to anchor
constants to the predecessor blocks.
SelectionDAG achieves this by having the OpaqueConstant node, which is just a
normal constant with an opaque flag set. I've opted to avoid introducing a new
constant generic instruction here. Instead, we have a new G_CONSTANT_FOLD_BARRIER
operation that constitutes a folding barrier.
These are somewhat like the optimization hints, G_ASSERT_ZEXT in that they're
eliminated by the generic instruction selection code.
This change by itself has very minor improvements in -Os CTMark overall. What this
does allow is better optimizations when future combines are added that rely on having
expensive constants remain unfolded.
Differential Revision: https://reviews.llvm.org/D144336
- Look through the source register
- Replace the destination register with the source register
+
+
+Miscellaneous
+-------------
+
+G_CONSTANT_FOLD_BARRIER
+^^^^^^^^^^^^^^^^^^^^^^^
+
+This operation is used as an opaque barrier to prevent constant folding. Combines
+and other transformations should not look through this. These have no other
+semantics and can be safely eliminated if a target chooses.
/// Generic freeze.
HANDLE_TARGET_OPCODE(G_FREEZE)
+/// Constant folding barrier.
+HANDLE_TARGET_OPCODE(G_CONSTANT_FOLD_BARRIER)
+
// INTRINSIC fptrunc_round intrinsic.
HANDLE_TARGET_OPCODE(G_INTRINSIC_FPTRUNC_ROUND)
let InOperandList = (ins type0:$src, untyped_imm_0:$align);
let hasSideEffects = false;
}
+
+// Prevent constant folding of the source value with any users.
+def G_CONSTANT_FOLD_BARRIER : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = false;
+}
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
MachineIRBuilder &MIRBuilder) {
// If we're bitcasting to the source type, we can reuse the source vreg.
if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==
- getLLTForType(*U.getType(), *DL))
+ getLLTForType(*U.getType(), *DL)) {
+ // If the source is a ConstantInt then it was probably created by
+ // ConstantHoisting and we should leave it alone.
+ if (isa<ConstantInt>(U.getOperand(0)))
+ return translateCast(TargetOpcode::G_CONSTANT_FOLD_BARRIER, U,
+ MIRBuilder);
return translateCopy(U, *U.getOperand(0), MIRBuilder);
+ }
return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder);
}
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/config.h"
continue;
}
- // Eliminate hints.
- if (isPreISelGenericOptimizationHint(MI.getOpcode())) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
+ // Eliminate hints or G_CONSTANT_FOLD_BARRIER.
+ if (isPreISelGenericOptimizationHint(MI.getOpcode()) ||
+ MI.getOpcode() == TargetOpcode::G_CONSTANT_FOLD_BARRIER) {
+ auto [DstReg, SrcReg] = MI.getFirst2Regs();
- // At this point, the destination register class of the hint may have
+ // At this point, the destination register class of the op may have
// been decided.
//
// Propagate that through to the source register.
const bool HasCSSC = ST.hasCSSC();
const bool HasRCPC3 = ST.hasRCPC3();
- getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
+ getActionDefinitionsBuilder(
+ {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
.legalFor({p0, s8, s16, s32, s64})
.legalFor(PackedVectorAllTypeList)
.widenScalarToNextPow2(0)
; At this point we mapped 46 values. The 'i32 100' constant will grow the map.
; CHECK: %46:_(s32) = G_CONSTANT i32 100
-; CHECK: $w0 = COPY %46(s32)
+; CHECK: $w0 = COPY %47(s32)
%res = bitcast i32 100 to i32
ret i32 %res
}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-apple-ios -global-isel -stop-after=irtranslator %s -o - | FileCheck %s --check-prefix=TRANSLATED
+; RUN: llc -mtriple=aarch64-apple-ios -global-isel -stop-before=instruction-select %s -o - | FileCheck %s --check-prefix=PRESELECTION
+; RUN: llc -mtriple=aarch64-apple-ios -global-isel -stop-after=instruction-select %s -o - | FileCheck %s --check-prefix=POSTSELECTION
+
+; Check we generate G_CONSTANT_FOLD_BARRIER of constants and don't fold them, since they're
+; used by constant hoisting to prevent constant folding/propagation.
+
+declare void @callee()
+
+define i32 @test(i32 %a, i1 %c) {
+ ; TRANSLATED-LABEL: name: test
+ ; TRANSLATED: bb.1.entry:
+ ; TRANSLATED-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; TRANSLATED-NEXT: liveins: $w0, $w1
+ ; TRANSLATED-NEXT: {{ $}}
+ ; TRANSLATED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; TRANSLATED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; TRANSLATED-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+ ; TRANSLATED-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[TRUNC]], 1
+ ; TRANSLATED-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s8)
+ ; TRANSLATED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 100000
+ ; TRANSLATED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; TRANSLATED-NEXT: [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:_(s32) = G_CONSTANT_FOLD_BARRIER [[C]]
+ ; TRANSLATED-NEXT: G_BRCOND [[TRUNC1]](s1), %bb.3
+ ; TRANSLATED-NEXT: G_BR %bb.2
+ ; TRANSLATED-NEXT: {{ $}}
+ ; TRANSLATED-NEXT: bb.2.common.ret:
+ ; TRANSLATED-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.3, [[C1]](s32), %bb.1
+ ; TRANSLATED-NEXT: $w0 = COPY [[PHI]](s32)
+ ; TRANSLATED-NEXT: RET_ReallyLR implicit $w0
+ ; TRANSLATED-NEXT: {{ $}}
+ ; TRANSLATED-NEXT: bb.3.cont:
+ ; TRANSLATED-NEXT: successors: %bb.2(0x80000000)
+ ; TRANSLATED-NEXT: {{ $}}
+ ; TRANSLATED-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[CONSTANT_FOLD_BARRIER]]
+ ; TRANSLATED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; TRANSLATED-NEXT: BL @callee, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp
+ ; TRANSLATED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; TRANSLATED-NEXT: G_BR %bb.2
+ ; PRESELECTION-LABEL: name: test
+ ; PRESELECTION: bb.1.entry:
+ ; PRESELECTION-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; PRESELECTION-NEXT: liveins: $w0, $w1
+ ; PRESELECTION-NEXT: {{ $}}
+ ; PRESELECTION-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+ ; PRESELECTION-NEXT: [[COPY1:%[0-9]+]]:gpr(s32) = COPY $w1
+ ; PRESELECTION-NEXT: [[TRUNC:%[0-9]+]]:gpr(s8) = G_TRUNC [[COPY1]](s32)
+ ; PRESELECTION-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:gpr(s8) = G_ASSERT_ZEXT [[TRUNC]], 1
+ ; PRESELECTION-NEXT: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
+ ; PRESELECTION-NEXT: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 100000
+ ; PRESELECTION-NEXT: [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:gpr(s32) = G_CONSTANT_FOLD_BARRIER [[C1]]
+ ; PRESELECTION-NEXT: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1
+ ; PRESELECTION-NEXT: [[ANYEXT:%[0-9]+]]:gpr(s32) = G_ANYEXT [[ASSERT_ZEXT]](s8)
+ ; PRESELECTION-NEXT: [[AND:%[0-9]+]]:gpr(s32) = G_AND [[ANYEXT]], [[C2]]
+ ; PRESELECTION-NEXT: G_BRCOND [[AND]](s32), %bb.3
+ ; PRESELECTION-NEXT: G_BR %bb.2
+ ; PRESELECTION-NEXT: {{ $}}
+ ; PRESELECTION-NEXT: bb.2.common.ret:
+ ; PRESELECTION-NEXT: [[PHI:%[0-9]+]]:gpr(s32) = G_PHI %7(s32), %bb.3, [[C]](s32), %bb.1
+ ; PRESELECTION-NEXT: $w0 = COPY [[PHI]](s32)
+ ; PRESELECTION-NEXT: RET_ReallyLR implicit $w0
+ ; PRESELECTION-NEXT: {{ $}}
+ ; PRESELECTION-NEXT: bb.3.cont:
+ ; PRESELECTION-NEXT: successors: %bb.2(0x80000000)
+ ; PRESELECTION-NEXT: {{ $}}
+ ; PRESELECTION-NEXT: [[ADD:%[0-9]+]]:gpr(s32) = G_ADD [[COPY]], [[CONSTANT_FOLD_BARRIER]]
+ ; PRESELECTION-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; PRESELECTION-NEXT: BL @callee, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp
+ ; PRESELECTION-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; PRESELECTION-NEXT: G_BR %bb.2
+ ; POSTSELECTION-LABEL: name: test
+ ; POSTSELECTION: bb.1.entry:
+ ; POSTSELECTION-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; POSTSELECTION-NEXT: liveins: $w0, $w1
+ ; POSTSELECTION-NEXT: {{ $}}
+ ; POSTSELECTION-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; POSTSELECTION-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; POSTSELECTION-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr
+ ; POSTSELECTION-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 100000
+ ; POSTSELECTION-NEXT: TBNZW [[COPY1]], 0, %bb.3
+ ; POSTSELECTION-NEXT: B %bb.2
+ ; POSTSELECTION-NEXT: {{ $}}
+ ; POSTSELECTION-NEXT: bb.2.common.ret:
+ ; POSTSELECTION-NEXT: [[PHI:%[0-9]+]]:gpr32 = PHI %7, %bb.3, [[COPY2]], %bb.1
+ ; POSTSELECTION-NEXT: $w0 = COPY [[PHI]]
+ ; POSTSELECTION-NEXT: RET_ReallyLR implicit $w0
+ ; POSTSELECTION-NEXT: {{ $}}
+ ; POSTSELECTION-NEXT: bb.3.cont:
+ ; POSTSELECTION-NEXT: successors: %bb.2(0x80000000)
+ ; POSTSELECTION-NEXT: {{ $}}
+ ; POSTSELECTION-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[MOVi32imm]]
+ ; POSTSELECTION-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; POSTSELECTION-NEXT: BL @callee, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp
+ ; POSTSELECTION-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ ; POSTSELECTION-NEXT: B %bb.2
+entry:
+ %hc = bitcast i32 100000 to i32
+ br i1 %c, label %cont, label %end
+cont:
+ %add = add i32 %a, %hc
+ call void @callee()
+ ret i32 %add
+end:
+ ret i32 0
+}
+
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+
+# DEBUG-NEXT: G_CONSTANT_FOLD_BARRIER (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+
#
# DEBUG-NEXT: G_INTRINSIC_FPTRUNC_ROUND (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
; CHECK: bb.1.entry:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2228259
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2
; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s32) from @var1)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2228259
+ ; CHECK-NEXT: [[OPAQUE:%[0-9]+]]:_(s32) = G_CONSTANT_FOLD_BARRIER [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2
- ; CHECK-NEXT: G_STORE [[C]](s32), [[GV3]](p0) :: (store (s32) into @var2)
+ ; CHECK-NEXT: G_STORE [[OPAQUE]](s32), [[GV3]](p0) :: (store (s32) into @var2)
; CHECK-NEXT: G_BR %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.if.then2:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1
- ; CHECK-NEXT: G_STORE [[C]](s32), [[GV4]](p0) :: (store (s32) into @var1)
+ ; CHECK-NEXT: G_STORE [[OPAQUE]](s32), [[GV4]](p0) :: (store (s32) into @var1)
; CHECK-NEXT: G_BR %bb.4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4.if.end:
; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3
- ; CHECK-NEXT: G_STORE [[C]](s32), [[GV5]](p0) :: (store (s32) into @var3)
+ ; CHECK-NEXT: G_STORE [[OPAQUE]](s32), [[GV5]](p0) :: (store (s32) into @var3)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: $w0 = COPY [[C3]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
; CHECK: bb.1.entry:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -2228259
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2_64
; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1_64
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s64) from @var1_64, align 4)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -2228259
+ ; CHECK-NEXT: [[OPAQUE:%[0-9]+]]:_(s64) = G_CONSTANT_FOLD_BARRIER [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]]
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2_64
- ; CHECK-NEXT: G_STORE [[C]](s64), [[GV3]](p0) :: (store (s64) into @var2_64)
+ ; CHECK-NEXT: G_STORE [[OPAQUE]](s64), [[GV3]](p0) :: (store (s64) into @var2_64)
; CHECK-NEXT: G_BR %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.if.then2:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1_64
- ; CHECK-NEXT: G_STORE [[C]](s64), [[GV4]](p0) :: (store (s64) into @var1_64)
+ ; CHECK-NEXT: G_STORE [[OPAQUE]](s64), [[GV4]](p0) :: (store (s64) into @var1_64)
; CHECK-NEXT: G_BR %bb.4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4.if.end:
; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
- ; CHECK-NEXT: G_STORE [[C]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
+ ; CHECK-NEXT: G_STORE [[OPAQUE]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: $x0 = COPY [[C3]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0