#include "AArch64.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineOperand.h"
// Our solution here is to try to convert flag setting operations between
// a interval of identical FCMPs, so that CSE will be able to eliminate one.
bool Changed = false;
- const auto *TII = MBB.getParent()->getSubtarget().getInstrInfo();
+ auto &MF = *MBB.getParent();
+ auto &Subtarget = MF.getSubtarget();
+ const auto &TII = Subtarget.getInstrInfo();
+ auto TRI = Subtarget.getRegisterInfo();
+ auto RBI = Subtarget.getRegBankInfo();
+ auto &MRI = MF.getRegInfo();
// The first step is to find the first and last FCMPs. If we have found
// at least two, then set the limit of the bottom-up walk to the first FCMP
<< II);
II.setDesc(TII->get(NewOpc));
II.RemoveOperand(DeadNZCVIdx);
+ // Changing the opcode can result in differing regclass requirements,
+ // e.g. SUBSWri uses gpr32 for the dest, whereas SUBWri uses gpr32sp.
+ // Constrain the regclasses, possibly introducing a copy.
+ constrainOperandRegClass(MF, *TRI, MRI, *TII, *RBI, II, II.getDesc(),
+ II.getOperand(0), 0);
Changed |= true;
} else {
// Otherwise, we just set the nzcv imp-def operand to be dead, so the
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-post-select-optimize -verify-machineinstrs %s -o - | FileCheck %s
+---
+name: pluto
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$w1' }
+ - { reg: '$x2' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+body: |
+ ; CHECK-LABEL: name: pluto
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK: liveins: $w1, $x0, $x2
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr32sp = COPY $w1
+ ; CHECK: [[COPY2:%[0-9]+]]:gpr64sp = COPY $x2
+ ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
+ ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[DEF]]
+ ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 872415232
+ ; CHECK: [[COPY4:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]]
+ ; CHECK: FCMPSrr [[COPY3]], [[COPY4]], implicit-def $nzcv
+ ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
+ ; CHECK: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY1]], 1, 0
+ ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[SUBWri]], %subreg.sub_32
+ ; CHECK: [[COPY5:%[0-9]+]]:fpr32 = COPY [[DEF]]
+ ; CHECK: FCMPSrr [[COPY5]], [[COPY4]], implicit-def $nzcv
+ ; CHECK: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv
+ ; CHECK: [[EORWrr:%[0-9]+]]:gpr32 = EORWrr [[CSINCWr]], [[CSINCWr1]]
+ ; CHECK: TBNZW [[EORWrr]], 0, %bb.2
+ ; CHECK: B %bb.1
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBREG_TO_REG]], 60, 59
+ ; CHECK: [[LDRSroX:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], [[UBFMXri]], 0, 0 :: (load 4)
+ ; CHECK: [[COPY6:%[0-9]+]]:fpr32 = COPY [[DEF]]
+ ; CHECK: [[FMULSrr:%[0-9]+]]:fpr32 = FMULSrr [[COPY6]], [[LDRSroX]]
+ ; CHECK: [[COPY7:%[0-9]+]]:fpr32 = COPY [[DEF]]
+ ; CHECK: [[FADDSrr:%[0-9]+]]:fpr32 = FADDSrr [[FMULSrr]], [[COPY7]]
+ ; CHECK: STRSui [[FADDSrr]], [[COPY2]], 0 :: (store 4)
+ ; CHECK: bb.2:
+ ; CHECK: RET_ReallyLR
+ bb.1:
+ liveins: $w1, $x0, $x2
+
+ %0:gpr64sp = COPY $x0
+ %1:gpr32sp = COPY $w1
+ %2:gpr64sp = COPY $x2
+ %3:gpr32 = IMPLICIT_DEF
+ %29:fpr32 = COPY %3
+ %33:gpr32 = MOVi32imm 872415232
+ %4:fpr32 = COPY %33
+ FCMPSrr %29, %4, implicit-def $nzcv
+ %28:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
+ %7:gpr32 = SUBSWri %1, 1, 0, implicit-def $nzcv
+ %8:gpr64 = SUBREG_TO_REG 0, %7, %subreg.sub_32
+ %30:fpr32 = COPY %3
+ FCMPSrr %30, %4, implicit-def $nzcv
+ %27:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv
+ %26:gpr32 = EORWrr %28, %27
+ TBNZW %26, 0, %bb.3
+ B %bb.2
+
+ bb.2:
+ %12:gpr64 = UBFMXri %8, 60, 59
+ %15:fpr32 = LDRSroX %0, %12, 0, 0 :: (load 4)
+ %31:fpr32 = COPY %3
+ %16:fpr32 = FMULSrr %31, %15
+ %32:fpr32 = COPY %3
+ %17:fpr32 = FADDSrr %16, %32
+ STRSui %17, %2, 0 :: (store 4)
+
+ bb.3:
+ RET_ReallyLR
+
+...