def FSTGT_S : FP_STORE_3R<0b00111000011101100, "fstgt.s", FPR32>;
def FSTLE_S : FP_STORE_3R<0b00111000011101110, "fstle.s", FPR32>;
+// Pseudo instructions for spill/reload CFRs.
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
+def PseudoST_CFR : Pseudo<(outs),
+ (ins CFR:$ccd, GPR:$rj, grlenimm:$imm)>;
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
+def PseudoLD_CFR : Pseudo<(outs CFR:$ccd),
+ (ins GPR:$rj, grlenimm:$imm)>;
} // Predicates = [HasBasicF]
//===----------------------------------------------------------------------===//
class PatFPSetcc<CondCode cc, LAInst CmpInst, RegisterClass RegTy>
: Pat<(any_fsetcc RegTy:$fj, RegTy:$fk, cc),
- (MOVCF2GR (CmpInst RegTy:$fj, RegTy:$fk))>;
+ (CmpInst RegTy:$fj, RegTy:$fk)>;
// SETOGT/SETOGE/SETUGT/SETUGE/SETGE/SETNE/SETGT will expand into
// SETOLT/SETOLE/SETULT/SETULE/SETLE/SETEQ/SETLT.
def : PatFPSetcc<SETOEQ, FCMP_CEQ_S, FPR32>;
class PatStrictFsetccs<CondCode cc, LAInst CmpInst, RegisterClass RegTy>
: Pat<(strict_fsetccs RegTy:$fj, RegTy:$fk, cc),
- (MOVCF2GR (CmpInst RegTy:$fj, RegTy:$fk))>;
+ (CmpInst RegTy:$fj, RegTy:$fk)>;
def : PatStrictFsetccs<SETOEQ, FCMP_SEQ_S, FPR32>;
def : PatStrictFsetccs<SETOLT, FCMP_SLT_S, FPR32>;
def : PatStrictFsetccs<SETOLE, FCMP_SLE_S, FPR32>;
/// Select
-def : Pat<(select GPR:$cc, FPR32:$fk, FPR32:$fj),
- (FSEL_S FPR32:$fj, FPR32:$fk, (MOVGR2CF GPR:$cc))>;
+def : Pat<(select CFR:$cc, FPR32:$fk, FPR32:$fj),
+ (FSEL_S FPR32:$fj, FPR32:$fk, CFR:$cc)>;
/// Selectcc
/// Select
-def : Pat<(select GPR:$cc, FPR64:$fk, FPR64:$fj),
- (FSEL_D FPR64:$fj, FPR64:$fk, (MOVGR2CF GPR:$cc))>;
+def : Pat<(select CFR:$cc, FPR64:$fk, FPR64:$fj),
+ (FSEL_D FPR64:$fj, FPR64:$fk, CFR:$cc)>;
/// Selectcc
#include "LoongArchInstrInfo.h"
#include "LoongArch.h"
#include "LoongArchMachineFunctionInfo.h"
+#include "LoongArchRegisterInfo.h"
+#include "MCTargetDesc/LoongArchMCTargetDesc.h"
#include "MCTargetDesc/LoongArchMatInt.h"
#include "llvm/CodeGen/RegisterScavenging.h"
return;
}
+ // GPR->CFR copy.
+ if (LoongArch::CFRRegClass.contains(DstReg) &&
+ LoongArch::GPRRegClass.contains(SrcReg)) {
+ BuildMI(MBB, MBBI, DL, get(LoongArch::MOVGR2CF), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ // CFR->GPR copy.
+ if (LoongArch::GPRRegClass.contains(DstReg) &&
+ LoongArch::CFRRegClass.contains(SrcReg)) {
+ BuildMI(MBB, MBBI, DL, get(LoongArch::MOVCF2GR), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
// FPR->FPR copies.
unsigned Opc;
if (LoongArch::FPR32RegClass.contains(DstReg, SrcReg)) {
Opcode = LoongArch::FST_S;
else if (LoongArch::FPR64RegClass.hasSubClassEq(RC))
Opcode = LoongArch::FST_D;
+ else if (LoongArch::CFRRegClass.hasSubClassEq(RC))
+ Opcode = LoongArch::PseudoST_CFR;
else
llvm_unreachable("Can't store this register to stack slot");
Opcode = LoongArch::FLD_S;
else if (LoongArch::FPR64RegClass.hasSubClassEq(RC))
Opcode = LoongArch::FLD_D;
+ else if (LoongArch::CFRRegClass.hasSubClassEq(RC))
+ Opcode = LoongArch::PseudoLD_CFR;
else
llvm_unreachable("Can't load this register from stack slot");
if (TFI->hasBP(MF))
markSuperRegs(Reserved, LoongArchABI::getBPReg()); // bp
+ // FIXME: To avoid generating COPY instructions between CFRs, only use $fcc0.
+ // This is required to work around the fact that COPY instruction between CFRs
+ // is not provided in LoongArch.
+ if (MF.getSubtarget<LoongArchSubtarget>().hasBasicF())
+ for (size_t Reg = LoongArch::FCC1; Reg <= LoongArch::FCC7; ++Reg)
+ markSuperRegs(Reserved, Reg);
+
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
const LoongArchInstrInfo *TII = STI.getInstrInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
DebugLoc DL = MI.getDebugLoc();
+ bool IsLA64 = STI.is64Bit();
+ unsigned MIOpc = MI.getOpcode();
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
Register FrameReg;
bool FrameRegIsKill = false;
if (!isInt<12>(Offset.getFixed())) {
- unsigned Addi = STI.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W;
- unsigned Add = STI.is64Bit() ? LoongArch::ADD_D : LoongArch::ADD_W;
+ unsigned Addi = IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W;
+ unsigned Add = IsLA64 ? LoongArch::ADD_D : LoongArch::ADD_W;
// The offset won't fit in an immediate, so use a scratch register instead.
// Modify Offset and FrameReg appropriately.
Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
TII->movImm(MBB, II, DL, ScratchReg, Offset.getFixed());
- if (MI.getOpcode() == Addi) {
+ if (MIOpc == Addi) {
BuildMI(MBB, II, DL, TII->get(Add), MI.getOperand(0).getReg())
.addReg(FrameReg)
.addReg(ScratchReg, RegState::Kill);
FrameRegIsKill = true;
}
+ // Spill CFRs.
+ if (MIOpc == LoongArch::PseudoST_CFR) {
+ Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
+ BuildMI(MBB, II, DL, TII->get(LoongArch::MOVCF2GR), ScratchReg)
+ .add(MI.getOperand(0));
+ BuildMI(MBB, II, DL, TII->get(IsLA64 ? LoongArch::ST_D : LoongArch::ST_W))
+ .addReg(ScratchReg, RegState::Kill)
+ .addReg(FrameReg)
+ .addImm(Offset.getFixed());
+ MI.eraseFromParent();
+ return;
+ }
+
+ // Reload CFRs.
+ if (MIOpc == LoongArch::PseudoLD_CFR) {
+ Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
+ BuildMI(MBB, II, DL, TII->get(IsLA64 ? LoongArch::LD_D : LoongArch::LD_W),
+ ScratchReg)
+ .addReg(FrameReg)
+ .addImm(Offset.getFixed());
+ BuildMI(MBB, II, DL, TII->get(LoongArch::MOVGR2CF))
+ .add(MI.getOperand(0))
+ .addReg(ScratchReg, RegState::Kill);
+ MI.eraseFromParent();
+ return;
+ }
+
MI.getOperand(FIOperandNum)
.ChangeToRegister(FrameReg, false, false, FrameRegIsKill);
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed());
; LA64-NEXT: # %bb.1: # %if.then
; LA64-NEXT: ret
; LA64-NEXT: .LBB17_2: # %if.else
-; LA64-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1
; LA64-NEXT: movcf2gr $a0, $fcc0
; LA64-NEXT: ret
%cmp = fcmp fast oeq double %a, 0.000000e+00
; LA32-NEXT: # %bb.1: # %if.then
; LA32-NEXT: ret
; LA32-NEXT: .LBB17_2: # %if.else
-; LA32-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1
; LA32-NEXT: movcf2gr $a0, $fcc0
; LA32-NEXT: ret
;
; LA64-NEXT: # %bb.1: # %if.then
; LA64-NEXT: ret
; LA64-NEXT: .LBB17_2: # %if.else
-; LA64-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1
; LA64-NEXT: movcf2gr $a0, $fcc0
; LA64-NEXT: ret
%cmp = fcmp fast oeq float %a, 0.000000e+00
; LA32F-NEXT: ffint.s.w $fa0, $fa0
; LA32F-NEXT: fadd.s $fa0, $fa0, $fa0
; LA32F-NEXT: slti $a1, $a0, 0
-; LA32F-NEXT: movgr2cf $fcc0, $a1
; LA32F-NEXT: movgr2fr.w $fa1, $a0
; LA32F-NEXT: ffint.s.w $fa1, $fa1
+; LA32F-NEXT: movgr2cf $fcc0, $a1
; LA32F-NEXT: fsel $fa0, $fa1, $fa0, $fcc0
; LA32F-NEXT: ret
;
; LA64D-NEXT: ffint.s.l $fa0, $fa0
; LA64D-NEXT: fadd.s $fa0, $fa0, $fa0
; LA64D-NEXT: slti $a1, $a0, 0
-; LA64D-NEXT: movgr2cf $fcc0, $a1
; LA64D-NEXT: movgr2fr.d $fa1, $a0
; LA64D-NEXT: ffint.s.l $fa1, $fa1
+; LA64D-NEXT: movgr2cf $fcc0, $a1
; LA64D-NEXT: fsel $fa0, $fa1, $fa0, $fcc0
; LA64D-NEXT: ret
%1 = uitofp i64 %a to float
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64
+
+;; Check the $fcc* register is spilled before funtion call and then reloaded.
+declare void @foo()
+
+define i1 @load_store_fcc_reg(float %a, i1 %c) {
+; LA32-LABEL: load_store_fcc_reg:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -32
+; LA32-NEXT: .cfi_def_cfa_offset 32
+; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: .cfi_offset 22, -8
+; LA32-NEXT: .cfi_offset 56, -16
+; LA32-NEXT: .cfi_offset 57, -24
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: fmov.s $fs0, $fa0
+; LA32-NEXT: movgr2fr.w $fs1, $zero
+; LA32-NEXT: fcmp.cult.s $fcc0, $fs1, $fa0
+; LA32-NEXT: movcf2gr $a0, $fcc0
+; LA32-NEXT: st.w $a0, $sp, 4
+; LA32-NEXT: bl %plt(foo)
+; LA32-NEXT: ld.w $a0, $sp, 4
+; LA32-NEXT: movgr2cf $fcc0, $a0
+; LA32-NEXT: bcnez $fcc0, .LBB0_2
+; LA32-NEXT: # %bb.1: # %if.then
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: b .LBB0_3
+; LA32-NEXT: .LBB0_2: # %if.else
+; LA32-NEXT: fcmp.cle.s $fcc0, $fs0, $fs1
+; LA32-NEXT: movcf2gr $a0, $fcc0
+; LA32-NEXT: .LBB0_3: # %if.then
+; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 32
+; LA32-NEXT: ret
+;
+; LA64-LABEL: load_store_fcc_reg:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -48
+; LA64-NEXT: .cfi_def_cfa_offset 48
+; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs1, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: .cfi_offset 1, -8
+; LA64-NEXT: .cfi_offset 22, -16
+; LA64-NEXT: .cfi_offset 56, -24
+; LA64-NEXT: .cfi_offset 57, -32
+; LA64-NEXT: move $fp, $a0
+; LA64-NEXT: fmov.s $fs0, $fa0
+; LA64-NEXT: movgr2fr.w $fs1, $zero
+; LA64-NEXT: fcmp.cult.s $fcc0, $fs1, $fa0
+; LA64-NEXT: movcf2gr $a0, $fcc0
+; LA64-NEXT: st.d $a0, $sp, 8
+; LA64-NEXT: bl %plt(foo)
+; LA64-NEXT: ld.d $a0, $sp, 8
+; LA64-NEXT: movgr2cf $fcc0, $a0
+; LA64-NEXT: bcnez $fcc0, .LBB0_2
+; LA64-NEXT: # %bb.1: # %if.then
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: b .LBB0_3
+; LA64-NEXT: .LBB0_2: # %if.else
+; LA64-NEXT: fcmp.cle.s $fcc0, $fs0, $fs1
+; LA64-NEXT: movcf2gr $a0, $fcc0
+; LA64-NEXT: .LBB0_3: # %if.then
+; LA64-NEXT: fld.d $fs1, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 48
+; LA64-NEXT: ret
+ %cmp = fcmp ole float %a, 0.000000e+00
+ call void @foo()
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ ret i1 %c
+
+if.else:
+ ret i1 %cmp
+}