return;
const TargetRegisterClass *DestRC = TRI->getRegClassForReg(*MRI, DestReg);
- if (TRI->isSGPRClass(SrcRC) && TRI->hasVectorRegisters(DestRC)) {
- MachineRegisterInfo::use_iterator NextUse;
- SmallVector<FoldCandidate, 4> CopyUses;
- for (MachineRegisterInfo::use_iterator Use = MRI->use_begin(DestReg),
- E = MRI->use_end();
- Use != E; Use = NextUse) {
- NextUse = std::next(Use);
- // There's no point trying to fold into an implicit operand.
- if (Use->isImplicit())
- continue;
-
- FoldCandidate FC = FoldCandidate(Use->getParent(), Use.getOperandNo(),
- &UseMI->getOperand(1));
- CopyUses.push_back(FC);
- }
- for (auto &F : CopyUses) {
- foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo, FoldList, CopiesToReplace);
+ if (!DestReg.isPhysical()) {
+ if (TRI->isSGPRClass(SrcRC) && TRI->hasVectorRegisters(DestRC)) {
+ MachineRegisterInfo::use_iterator NextUse;
+ SmallVector<FoldCandidate, 4> CopyUses;
+ for (MachineRegisterInfo::use_iterator Use = MRI->use_begin(DestReg),
+ E = MRI->use_end();
+ Use != E; Use = NextUse) {
+ NextUse = std::next(Use);
+ // There's no point trying to fold into an implicit operand.
+ if (Use->isImplicit())
+ continue;
+
+ FoldCandidate FC = FoldCandidate(Use->getParent(), Use.getOperandNo(),
+ &UseMI->getOperand(1));
+ CopyUses.push_back(FC);
+ }
+ for (auto &F : CopyUses) {
+ foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo, FoldList, CopiesToReplace);
+ }
}
- }
- if (DestRC == &AMDGPU::AGPR_32RegClass &&
- TII->isInlineConstant(OpToFold, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
- UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32));
- UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
- CopiesToReplace.push_back(UseMI);
- return;
+ if (DestRC == &AMDGPU::AGPR_32RegClass &&
+ TII->isInlineConstant(OpToFold, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
+ UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32));
+ UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
+ CopiesToReplace.push_back(UseMI);
+ return;
+ }
}
// In order to fold immediates into copies, we need to change the
S_ENDPGM 0, implicit $vgpr0
...
+
+# The users of $vgpr1 should not be visited for further immediate
+# folding.
+
+# GCN-LABEL: name: no_fold_physreg_users_vgpr{{$}}
+# GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+# GCN-NEXT: S_NOP 0, implicit-def $vgpr1
+# GCN-NEXT: %2:vgpr_32 = COPY $vgpr1
+# GCN-NEXT: $vgpr2 = COPY %2
+---
+name: no_fold_physreg_users_vgpr
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0:sreg_32 = S_MOV_B32 0
+ %1:vgpr_32 = COPY %0
+ $vgpr1 = COPY %0
+ S_NOP 0, implicit-def $vgpr1
+ %2:vgpr_32 = COPY $vgpr1
+ $vgpr2 = COPY %2
+ S_ENDPGM 0
+
+...
--- /dev/null
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+
+; Make sure the return value of the first call is not overwritten with
+; a constant before the fadd use.
+
+; CHECK-LABEL: vgpr_multi_use_imm_fold:
+; CHECK: v_mov_b32_e32 v0, 0{{$}}
+; CHECK: v_mov_b32_e32 v1, 2.0{{$}}
+; CHECK: s_swappc_b64
+; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], 0
+; CHECK: s_swappc_b64
+define amdgpu_kernel void @vgpr_multi_use_imm_fold() {
+entry:
+ store double 0.0, double addrspace(1)* undef, align 8
+ %call0 = tail call fastcc double @__ocml_log_f64(double 2.0)
+ %op = fadd double %call0, 0.0
+ %call1 = tail call fastcc double @__ocml_sqrt_f64(double %op)
+ ret void
+}
+
+declare hidden fastcc double @__ocml_log_f64(double)
+declare hidden fastcc double @__ocml_sqrt_f64(double)