static constexpr unsigned ModifierOpNames[] = {
AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
- AMDGPU::OpName::omod};
+ AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
void SIInstrInfo::removeModOperands(MachineInstr &MI) const {
unsigned Opc = MI.getOpcode();
- for (unsigned Name : reverse(ModifierOpNames))
- MI.removeOperand(AMDGPU::getNamedOperandIdx(Opc, Name));
+ for (unsigned Name : reverse(ModifierOpNames)) {
+ int Idx = AMDGPU::getNamedOperandIdx(Opc, Name);
+ if (Idx >= 0)
+ MI.removeOperand(Idx);
+ }
}
bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
NewOpcode = AMDGPU::V_MADAK_F16;
break;
case AMDGPU::V_FMA_F16_e64:
+ case AMDGPU::V_FMA_F16_gfx9_e64:
NewOpcode = AMDGPU::V_FMAAK_F16;
break;
}
NewOpcode = AMDGPU::V_MADMK_F16;
break;
case AMDGPU::V_FMA_F16_e64:
+ case AMDGPU::V_FMA_F16_gfx9_e64:
NewOpcode = AMDGPU::V_FMAMK_F16;
break;
}
if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||
MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
- MI.getOpcode() == AMDGPU::V_FMA_F16_e64) {
+ MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
+ MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64) {
shrinkMadFma(MI);
continue;
}
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefixes=GFX9,GFX9-SDAG
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefixes=GFX9,GFX9-GISEL
-; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10,GFX10-SDAG
-; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10
declare half @llvm.fma.f16(half, half, half)
declare half @llvm.maxnum.f16(half, half)
; GFX9-GISEL-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: test_fmaak:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-SDAG-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: test_fmaak:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-GISEL-NEXT: v_fma_f16 v0, v0, v1, 0x4200
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: test_fmaak:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200
+; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call half @llvm.fma.f16(half %x, half %y, half 0xH4200)
ret half %r
}
; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_FMA_F16_e64 0, 18688, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
+ %2:vgpr_32 = V_FMA_F16_gfx9_e64 0, 18688, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
SI_RETURN implicit %2
...
; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, 18688, 0, %1, 0, 0, implicit $mode, implicit $exec
+ %2:vgpr_32 = V_FMA_F16_gfx9_e64 0, %0, 0, 18688, 0, %1, 0, 0, implicit $mode, implicit $exec
SI_RETURN implicit %2
...
; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
+ %2:vgpr_32 = V_FMA_F16_gfx9_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
SI_RETURN implicit %2
...
; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:sreg_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
+ %2:vgpr_32 = V_FMA_F16_gfx9_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
SI_RETURN implicit %2
...