bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
+ bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
+ bool AllowAbs = true) const;
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
+ bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
unsigned Opc
= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+ // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
+ // omod
+ SDValue Ops[8];
+ SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
+ SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
+ SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
}
}
bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
- unsigned &Mods) const {
+ unsigned &Mods,
+ bool AllowAbs) const {
Mods = 0;
Src = In;
Src = Src.getOperand(0);
}
- if (Src.getOpcode() == ISD::FABS) {
+ if (AllowAbs && Src.getOpcode() == ISD::FABS) {
Mods |= SISrcMods::ABS;
Src = Src.getOperand(0);
}
return false;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ unsigned Mods;
+ if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
+ SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
SelectVOP3Mods(In, Src, SrcMods);
return SelectVOP3Mods(In, Src, SrcMods);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
+ SDValue &SrcMods, SDValue &Clamp,
+ SDValue &Omod) const {
+ SDLoc DL(In);
+ Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
+
+ return SelectVOP3BMods(In, Src, SrcMods);
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
SDValue &Clamp, SDValue &Omod) const {
Src = In;
else
return false;
+ // TODO: Match source modifiers.
+
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock *MBB = MI.getParent();
auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), Dst0)
.addDef(Dst1)
- .addUse(Src0)
- .addUse(Denom)
- .addUse(Numer);
+ .addImm(0) // $src0_modifiers
+ .addUse(Src0) // $src0
+ .addImm(0) // $src1_modifiers
+ .addUse(Denom) // $src1
+ .addImm(0) // $src2_modifiers
+ .addUse(Numer) // $src2
+ .addImm(0) // $clamp
+ .addImm(0); // $omod
MI.eraseFromParent();
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}
std::pair<Register, unsigned>
-AMDGPUInstructionSelector::selectVOP3ModsImpl(MachineOperand &Root) const {
+AMDGPUInstructionSelector::selectVOP3ModsImpl(MachineOperand &Root,
+ bool AllowAbs) const {
Register Src = Root.getReg();
Register OrigSrc = Src;
unsigned Mods = 0;
MI = getDefIgnoringCopies(Src, *MRI);
}
- if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
+ if (AllowAbs && MI && MI->getOpcode() == AMDGPU::G_FABS) {
Src = MI->getOperand(1).getReg();
Mods |= SISrcMods::ABS;
}
}
InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3BMods0(MachineOperand &Root) const {
+ Register Src;
+ unsigned Mods;
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /* AllowAbs */ false);
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
return {{
[=](MachineInstrBuilder &MIB) { MIB.add(Root); },
}
InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3BMods(MachineOperand &Root) const {
+ Register Src;
+ unsigned Mods;
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /* AllowAbs */ false);
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const {
Register Reg = Root.getReg();
const MachineInstr *Def = getDefIgnoringCopies(Reg, *MRI);
bool selectGlobalAtomicFaddIntrinsic(MachineInstr &I) const;
bool selectBVHIntrinsic(MachineInstr &I) const;
- std::pair<Register, unsigned>
- selectVOP3ModsImpl(MachineOperand &Root) const;
+ std::pair<Register, unsigned> selectVOP3ModsImpl(MachineOperand &Root,
+ bool AllowAbs = true) const;
InstructionSelector::ComplexRendererFns
selectVCSRC(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectVOP3Mods0(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
+ selectVOP3BMods0(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
selectVOP3OMods(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectVOP3Mods(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectVOP3BMods(MachineOperand &Root) const;
ComplexRendererFns selectVOP3NoMods(MachineOperand &Root) const;
bool validateVccOperand(unsigned Reg) const;
bool validateVOP3Literal(const MCInst &Inst) const;
bool validateMAIAccWrite(const MCInst &Inst);
+ bool validateDivScale(const MCInst &Inst);
unsigned getConstantBusLimit(unsigned Opcode) const;
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
return true;
}
+bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
+ switch (Inst.getOpcode()) {
+ default:
+ return true;
+ case V_DIV_SCALE_F32_gfx6_gfx7:
+ case V_DIV_SCALE_F32_vi:
+ case V_DIV_SCALE_F32_gfx10:
+ case V_DIV_SCALE_F64_gfx6_gfx7:
+ case V_DIV_SCALE_F64_vi:
+ case V_DIV_SCALE_F64_gfx10:
+ break;
+ }
+
+ // TODO: Check that src0 = src1 or src2.
+
+ for (auto Name : {AMDGPU::OpName::src0_modifiers,
+ AMDGPU::OpName::src2_modifiers,
+ AMDGPU::OpName::src2_modifiers}) {
+ if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
+ .getImm() &
+ SISrcMods::ABS) {
+ Error(getLoc(), "ABS not allowed in VOP3B instructions");
+ return false;
+ }
+ }
+
+ return true;
+}
+
bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
if (!validateMAIAccWrite(Inst)) {
return false;
}
+ if (!validateDivScale(Inst)) {
+ return false;
+ }
return true;
}
// Satisfy the operand register constraint when one of the inputs is
// undefined. Ordinarily each undef value will have its own implicit_def of
// a vreg, so force these to use a single register.
- SDValue Src0 = Node->getOperand(0);
- SDValue Src1 = Node->getOperand(1);
- SDValue Src2 = Node->getOperand(2);
+ SDValue Src0 = Node->getOperand(1);
+ SDValue Src1 = Node->getOperand(3);
+ SDValue Src2 = Node->getOperand(5);
if ((Src0.isMachineOpcode() &&
Src0.getMachineOpcode() != AMDGPU::IMPLICIT_DEF) &&
} else
break;
- SmallVector<SDValue, 4> Ops = { Src0, Src1, Src2 };
- for (unsigned I = 3, N = Node->getNumOperands(); I != N; ++I)
- Ops.push_back(Node->getOperand(I));
-
+ SmallVector<SDValue, 9> Ops(Node->op_begin(), Node->op_end());
+ Ops[1] = Src0;
+ Ops[3] = Src1;
+ Ops[5] = Src2;
Ops.push_back(ImpDef.getValue(1));
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
}
return false;
}
}
+ if ((getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm() &
+ SISrcMods::ABS) ||
+ (getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm() &
+ SISrcMods::ABS) ||
+ (getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm() &
+ SISrcMods::ABS)) {
+ ErrInfo = "ABS not allowed in VOP3B instructions";
+ return false;
+ }
}
if (isSOP2(MI) || isSOPC(MI)) {
}
class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
- // v_div_scale_{f32|f64} do not support input modifiers.
- let HasModifiers = 0;
- let HasClamp = 0;
- let HasOMod = 0;
let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
- let Asm64 = " $vdst, $sdst, $src0, $src1, $src2";
+ let Asm64 = " $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod";
}
def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32> {
let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it does.
def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
let SchedRW = [WriteFloatFMA, WriteSALU];
- let AsmMatchConverter = "";
}
// Double precision division pre-scale.
def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
let SchedRW = [WriteDouble, WriteSALU];
- let AsmMatchConverter = "";
let FPDPRounding = 1;
}
} // End mayRaiseFPException = 0
; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: %6:vgpr_32, %7:sreg_64 = nofpexcept V_DIV_SCALE_F32 [[COPY2]], [[COPY1]], [[COPY2]], implicit $mode, implicit $exec
- ; GCN: %8:vgpr_32, %9:sreg_64 = nofpexcept V_DIV_SCALE_F32 [[COPY1]], [[COPY1]], [[COPY2]], implicit $mode, implicit $exec
+ ; GCN: %6:vgpr_32, %7:sreg_64 = nofpexcept V_DIV_SCALE_F32 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
+ ; GCN: %8:vgpr_32, %9:sreg_64 = nofpexcept V_DIV_SCALE_F32 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GCN: %10:vgpr_32 = nofpexcept V_RCP_F32_e64 0, %8, 0, 0, implicit $mode, implicit $exec
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3
; GCN: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216
; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN: %6:vgpr_32, %7:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32 [[COPY2]], [[COPY1]], [[COPY2]], implicit $mode, implicit $exec
- ; GCN: %8:vgpr_32, %9:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32 [[COPY1]], [[COPY1]], [[COPY2]], implicit $mode, implicit $exec
+ ; GCN: %6:vgpr_32, %7:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
+ ; GCN: %8:vgpr_32, %9:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GCN: %10:vgpr_32 = nnan nofpexcept V_RCP_F32_e64 0, %8, 0, 0, implicit $mode, implicit $exec
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3
; GCN: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216
S_BRANCH %bb.3
bb.3:
- $vgpr4, $vcc = V_DIV_SCALE_F32 $vgpr1, $vgpr1, $vgpr3, implicit $mode, implicit $exec
+ $vgpr4, $vcc = V_DIV_SCALE_F32 0, $vgpr1, 0, $vgpr1, 0, $vgpr3, 0, 0, implicit $mode, implicit $exec
$vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec
S_ENDPGM 0
; SI-LABEL: {{^}}test_div_scale_f32_fneg_num:
; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; SI: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
-; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[NEG_A]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], -[[A]]
; SI: buffer_store_dword [[RESULT0]]
; SI: s_endpgm
define amdgpu_kernel void @test_div_scale_f32_fneg_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
; SI-LABEL: {{^}}test_div_scale_f32_fneg_den:
; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; SI: v_xor_b32_e32 [[NEG_B:v[0-9]+]], 0x80000000, [[B]]
-; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[NEG_B]], [[NEG_B]], [[A]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], -[[B]], -[[B]], [[A]]
; SI: buffer_store_dword [[RESULT0]]
; SI: s_endpgm
define amdgpu_kernel void @test_div_scale_f32_fneg_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
%87:vgpr_32 = IMPLICIT_DEF
%88:vgpr_32 = IMPLICIT_DEF
%90:vgpr_32 = IMPLICIT_DEF
- %91:vgpr_32, dead %92:sreg_64 = nofpexcept V_DIV_SCALE_F32 %90, %90, 1065353216, implicit $mode, implicit $exec
+ %91:vgpr_32, dead %92:sreg_64 = nofpexcept V_DIV_SCALE_F32 0, %90, 0, %90, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
%95:vgpr_32 = nofpexcept V_FMA_F32 0, 0, 0, 0, 0, undef %93:vgpr_32, 0, 0, implicit $mode, implicit $exec
- %96:vgpr_32, %97:sreg_64 = nofpexcept V_DIV_SCALE_F32 1065353216, %90, 1065353216, implicit $mode, implicit $exec
+ %96:vgpr_32, %97:sreg_64 = nofpexcept V_DIV_SCALE_F32 0, 1065353216, 0, %90, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
%98:vgpr_32 = IMPLICIT_DEF
%99:vgpr_32 = IMPLICIT_DEF
%100:vgpr_32 = IMPLICIT_DEF
%103:vgpr_32 = IMPLICIT_DEF
%104:vgpr_32 = IMPLICIT_DEF
%105:vgpr_32 = IMPLICIT_DEF
- %106:vgpr_32, dead %107:sreg_64 = nofpexcept V_DIV_SCALE_F32 %90, %90, %105, implicit $mode, implicit $exec
+ %106:vgpr_32, dead %107:sreg_64 = nofpexcept V_DIV_SCALE_F32 0, %90, 0, %90, 0, %105, 0, 0, implicit $mode, implicit $exec
%108:vgpr_32 = nofpexcept V_RCP_F32_e32 0, implicit $mode, implicit $exec
%109:vgpr_32 = IMPLICIT_DEF
%110:vgpr_32 = nofpexcept V_FMA_F32 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- %111:vgpr_32, %112:sreg_64 = nofpexcept V_DIV_SCALE_F32 0, 0, 0, implicit $mode, implicit $exec
+ %111:vgpr_32, %112:sreg_64 = nofpexcept V_DIV_SCALE_F32 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
%113:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %110, implicit $mode, implicit $exec
%114:vgpr_32 = IMPLICIT_DEF
%115:vgpr_32 = IMPLICIT_DEF
v_interp_p2_f16 v5, v2, attr1.x, v3 mul:2
// GFX67: error: instruction not supported on this GPU
// GFX89: error: invalid operand for instruction
+
+//
+// v_div_scale_*
+//
+
+v_div_scale_f32 v24, vcc, v22, v22, |v20|
+// GCN: error: ABS not allowed in VOP3B instructions
+
+v_div_scale_f64 v[24:25], vcc, -|v[22:23]|, v[22:23], v[20:21]
+// GCN: error: ABS not allowed in VOP3B instructions
// SICI: v_div_scale_f64 v[24:25], vcc, v[22:23], v[22:23], v[20:21] ; encoding: [0x18,0x6a,0xdc,0xd2,0x16,0x2d,0x52,0x04]
// VI: v_div_scale_f64 v[24:25], vcc, v[22:23], v[22:23], v[20:21] ; encoding: [0x18,0x6a,0xe1,0xd1,0x16,0x2d,0x52,0x04]
-v_div_scale_f64 v[24:25], s[10:11], v[22:23], v[20:21], v[20:21]
-// SICI: v_div_scale_f64 v[24:25], s[10:11], v[22:23], v[20:21], v[20:21] ; encoding: [0x18,0x0a,0xdc,0xd2,0x16,0x29,0x52,0x04]
-// VI: v_div_scale_f64 v[24:25], s[10:11], v[22:23], v[20:21], v[20:21] ; encoding: [0x18,0x0a,0xe1,0xd1,0x16,0x29,0x52,0x04]
+v_div_scale_f64 v[24:25], s[10:11], -v[22:23], v[20:21], v[20:21] clamp
+// SICI: v_div_scale_f64 v[24:25], s[10:11], -v[22:23], v[20:21], v[20:21] clamp ; encoding: [0x18,0x0a,0xdc,0xd2,0x16,0x29,0x52,0x24]
+// VI: v_div_scale_f64 v[24:25], s[10:11], -v[22:23], v[20:21], v[20:21] clamp ; encoding: [0x18,0x8a,0xe1,0xd1,0x16,0x29,0x52,0x24]
+
+v_div_scale_f64 v[24:25], s[10:11], v[22:23], -v[20:21], v[20:21] clamp mul:2
+// SICI: v_div_scale_f64 v[24:25], s[10:11], v[22:23], -v[20:21], v[20:21] clamp mul:2 ; encoding: [0x18,0x0a,0xdc,0xd2,0x16,0x29,0x52,0x4c]
+// VI: v_div_scale_f64 v[24:25], s[10:11], v[22:23], -v[20:21], v[20:21] clamp mul:2 ; encoding: [0x18,0x8a,0xe1,0xd1,0x16,0x29,0x52,0x4c]
+
+v_div_scale_f64 v[24:25], s[10:11], v[22:23], v[20:21], -v[20:21]
+// SICI: v_div_scale_f64 v[24:25], s[10:11], v[22:23], v[20:21], -v[20:21] ; encoding: [0x18,0x0a,0xdc,0xd2,0x16,0x29,0x52,0x84]
+// VI: v_div_scale_f64 v[24:25], s[10:11], v[22:23], v[20:21], -v[20:21] ; encoding: [0x18,0x0a,0xe1,0xd1,0x16,0x29,0x52,0x84]
v_div_scale_f32 v24, vcc, v22, v22, v20
// SICI: v_div_scale_f32 v24, vcc, v22, v22, v20 ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0x2d,0x52,0x04]
// VI: v_div_scale_f32 v24, vcc, v22, v22, v20 ; encoding: [0x18,0x6a,0xe0,0xd1,0x16,0x2d,0x52,0x04]
+v_div_scale_f32 v24, vcc, -v22, v22, v20
+// SICI: v_div_scale_f32 v24, vcc, -v22, v22, v20 ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0x2d,0x52,0x24]
+// VI: v_div_scale_f32 v24, vcc, -v22, v22, v20 ; encoding: [0x18,0x6a,0xe0,0xd1,0x16,0x2d,0x52,0x24]
+
+v_div_scale_f32 v24, vcc, v22, -v22, v20 clamp
+// SICI: v_div_scale_f32 v24, vcc, v22, -v22, v20 clamp ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0x2d,0x52,0x44]
+// VI: v_div_scale_f32 v24, vcc, v22, -v22, v20 clamp ; encoding: [0x18,0xea,0xe0,0xd1,0x16,0x2d,0x52,0x44]
+
+v_div_scale_f32 v24, vcc, v22, v22, -v20 clamp div:2
+// SICI: v_div_scale_f32 v24, vcc, v22, v22, -v20 clamp div:2 ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0x2d,0x52,0x9c]
+// VI: v_div_scale_f32 v24, vcc, v22, v22, -v20 clamp div:2 ; encoding: [0x18,0xea,0xe0,0xd1,0x16,0x2d,0x52,0x9c]
+
v_div_scale_f32 v24, s[10:11], v22, v22, v20
// SICI: v_div_scale_f32 v24, s[10:11], v22, v22, v20 ; encoding: [0x18,0x0a,0xda,0xd2,0x16,0x2d,0x52,0x04]
// VI: v_div_scale_f32 v24, s[10:11], v22, v22, v20 ; encoding: [0x18,0x0a,0xe0,0xd1,0x16,0x2d,0x52,0x04]