[AMDGPU] Allow some modifiers on VOP3B instructions

author Jay Foad <jay.foad@amd.com>

Tue, 27 Oct 2020 12:29:11 +0000 (12:29 +0000)

committer Jay Foad <jay.foad@amd.com>

Wed, 28 Oct 2020 21:54:14 +0000 (21:54 +0000)
author Jay Foad <jay.foad@amd.com>
Tue, 27 Oct 2020 12:29:11 +0000 (12:29 +0000)
committer Jay Foad <jay.foad@amd.com>
Wed, 28 Oct 2020 21:54:14 +0000 (21:54 +0000)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

index 0f8c9d0..6074f49 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -256,11 +256,15 @@ private:
    bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
  
    bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
-  bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
+  bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
+                          bool AllowAbs = true) const;
    bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
    bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
    bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
                         SDValue &Clamp, SDValue &Omod) const;
+  bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+                        SDValue &Clamp, SDValue &Omod) const;
    bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
                           SDValue &Clamp, SDValue &Omod) const;
  
@@ -1129,7 +1133,12 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
    unsigned Opc
      = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
  
-  SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
+  // omod
+  SDValue Ops[8];
+  SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
+  SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
+  SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
    CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
  }
  
@@ -2630,7 +2639,8 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
  }
  
  bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
-                                            unsigned &Mods) const {
+                                            unsigned &Mods,
+                                            bool AllowAbs) const {
    Mods = 0;
    Src = In;
  
@@ -2639,7 +2649,7 @@ bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
      Src = Src.getOperand(0);
    }
  
-  if (Src.getOpcode() == ISD::FABS) {
+  if (AllowAbs && Src.getOpcode() == ISD::FABS) {
      Mods |= SISrcMods::ABS;
      Src = Src.getOperand(0);
    }
@@ -2658,6 +2668,17 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
    return false;
  }
  
+bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
+                                         SDValue &SrcMods) const {
+  unsigned Mods;
+  if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
+    SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+    return true;
+  }
+
+  return false;
+}
+
  bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
                                               SDValue &SrcMods) const {
    SelectVOP3Mods(In, Src, SrcMods);
@@ -2682,6 +2703,16 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
    return SelectVOP3Mods(In, Src, SrcMods);
  }
  
+bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
+                                          SDValue &SrcMods, SDValue &Clamp,
+                                          SDValue &Omod) const {
+  SDLoc DL(In);
+  Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
+  Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
+
+  return SelectVOP3BMods(In, Src, SrcMods);
+}
+
  bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
                                           SDValue &Clamp, SDValue &Omod) const {
    Src = In;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

index 7d7e7dc..fb1b06a 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -871,6 +871,8 @@ bool AMDGPUInstructionSelector::selectDivScale(MachineInstr &MI) const {
    else
      return false;
  
+  // TODO: Match source modifiers.
+
    const DebugLoc &DL = MI.getDebugLoc();
    MachineBasicBlock *MBB = MI.getParent();
  
@@ -882,9 +884,14 @@ bool AMDGPUInstructionSelector::selectDivScale(MachineInstr &MI) const {
  
    auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), Dst0)
      .addDef(Dst1)
-    .addUse(Src0)
-    .addUse(Denom)
-    .addUse(Numer);
+    .addImm(0)     // $src0_modifiers
+    .addUse(Src0)  // $src0
+    .addImm(0)     // $src1_modifiers
+    .addUse(Denom) // $src1
+    .addImm(0)     // $src2_modifiers
+    .addUse(Numer) // $src2
+    .addImm(0)     // $clamp
+    .addImm(0);    // $omod
  
    MI.eraseFromParent();
    return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
@@ -3157,7 +3164,8 @@ AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
  }
  
  std::pair<Register, unsigned>
-AMDGPUInstructionSelector::selectVOP3ModsImpl(MachineOperand &Root) const {
+AMDGPUInstructionSelector::selectVOP3ModsImpl(MachineOperand &Root,
+                                              bool AllowAbs) const {
    Register Src = Root.getReg();
    Register OrigSrc = Src;
    unsigned Mods = 0;
@@ -3169,7 +3177,7 @@ AMDGPUInstructionSelector::selectVOP3ModsImpl(MachineOperand &Root) const {
      MI = getDefIgnoringCopies(Src, *MRI);
    }
  
-  if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
+  if (AllowAbs && MI && MI->getOpcode() == AMDGPU::G_FABS) {
      Src = MI->getOperand(1).getReg();
      Mods |= SISrcMods::ABS;
    }
@@ -3216,6 +3224,20 @@ AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
  }
  
  InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3BMods0(MachineOperand &Root) const {
+  Register Src;
+  unsigned Mods;
+  std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /* AllowAbs */ false);
+
+  return {{
+      [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+      [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
+      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },    // clamp
+      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }     // omod
+  }};
+}
+
+InstructionSelector::ComplexRendererFns
  AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
    return {{
        [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
@@ -3237,6 +3259,18 @@ AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
  }
  
  InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3BMods(MachineOperand &Root) const {
+  Register Src;
+  unsigned Mods;
+  std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /* AllowAbs */ false);
+
+  return {{
+      [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+      [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
+  }};
+}
+
+InstructionSelector::ComplexRendererFns
  AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const {
    Register Reg = Root.getReg();
    const MachineInstr *Def = getDefIgnoringCopies(Reg, *MRI);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

index c744190..9a11c37 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -145,8 +145,8 @@ private:
    bool selectGlobalAtomicFaddIntrinsic(MachineInstr &I) const;
    bool selectBVHIntrinsic(MachineInstr &I) const;
  
-  std::pair<Register, unsigned>
-  selectVOP3ModsImpl(MachineOperand &Root) const;
+  std::pair<Register, unsigned> selectVOP3ModsImpl(MachineOperand &Root,
+                                                   bool AllowAbs = true) const;
  
    InstructionSelector::ComplexRendererFns
    selectVCSRC(MachineOperand &Root) const;
@@ -157,9 +157,13 @@ private:
    InstructionSelector::ComplexRendererFns
    selectVOP3Mods0(MachineOperand &Root) const;
    InstructionSelector::ComplexRendererFns
+  selectVOP3BMods0(MachineOperand &Root) const;
+  InstructionSelector::ComplexRendererFns
    selectVOP3OMods(MachineOperand &Root) const;
    InstructionSelector::ComplexRendererFns
    selectVOP3Mods(MachineOperand &Root) const;
+  InstructionSelector::ComplexRendererFns
+  selectVOP3BMods(MachineOperand &Root) const;
  
    ComplexRendererFns selectVOP3NoMods(MachineOperand &Root) const;
  
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

index d33d9a2..5a0e777 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1369,6 +1369,7 @@ private:
    bool validateVccOperand(unsigned Reg) const;
    bool validateVOP3Literal(const MCInst &Inst) const;
    bool validateMAIAccWrite(const MCInst &Inst);
+  bool validateDivScale(const MCInst &Inst);
    unsigned getConstantBusLimit(unsigned Opcode) const;
    bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
    bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
@@ -3304,6 +3305,35 @@ bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) {
    return true;
  }
  
+bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
+  switch (Inst.getOpcode()) {
+  default:
+    return true;
+  case V_DIV_SCALE_F32_gfx6_gfx7:
+  case V_DIV_SCALE_F32_vi:
+  case V_DIV_SCALE_F32_gfx10:
+  case V_DIV_SCALE_F64_gfx6_gfx7:
+  case V_DIV_SCALE_F64_vi:
+  case V_DIV_SCALE_F64_gfx10:
+    break;
+  }
+
+  // TODO: Check that src0 = src1 or src2.
+
+  for (auto Name : {AMDGPU::OpName::src0_modifiers,
+                    AMDGPU::OpName::src2_modifiers,
+                    AMDGPU::OpName::src2_modifiers}) {
+    if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
+            .getImm() &
+        SISrcMods::ABS) {
+      Error(getLoc(), "ABS not allowed in VOP3B instructions");
+      return false;
+    }
+  }
+
+  return true;
+}
+
  bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
  
    const unsigned Opc = Inst.getOpcode();
@@ -3777,6 +3807,9 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
    if (!validateMAIAccWrite(Inst)) {
      return false;
    }
+  if (!validateDivScale(Inst)) {
+    return false;
+  }
  
    return true;
  }
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp

index bfff781..8294044 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -11085,9 +11085,9 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
      // Satisfy the operand register constraint when one of the inputs is
      // undefined. Ordinarily each undef value will have its own implicit_def of
      // a vreg, so force these to use a single register.
-    SDValue Src0 = Node->getOperand(0);
-    SDValue Src1 = Node->getOperand(1);
-    SDValue Src2 = Node->getOperand(2);
+    SDValue Src0 = Node->getOperand(1);
+    SDValue Src1 = Node->getOperand(3);
+    SDValue Src2 = Node->getOperand(5);
  
      if ((Src0.isMachineOpcode() &&
           Src0.getMachineOpcode() != AMDGPU::IMPLICIT_DEF) &&
@@ -11122,10 +11122,10 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
      } else
        break;
  
-    SmallVector<SDValue, 4> Ops = { Src0, Src1, Src2 };
-    for (unsigned I = 3, N = Node->getNumOperands(); I != N; ++I)
-      Ops.push_back(Node->getOperand(I));
-
+    SmallVector<SDValue, 9> Ops(Node->op_begin(), Node->op_end());
+    Ops[1] = Src0;
+    Ops[3] = Src1;
+    Ops[5] = Src2;
      Ops.push_back(ImpDef.getValue(1));
      return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
    }
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

index 21e108e..3d9a654 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3887,6 +3887,15 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
          return false;
        }
      }
+    if ((getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm() &
+         SISrcMods::ABS) ||
+        (getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm() &
+         SISrcMods::ABS) ||
+        (getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm() &
+         SISrcMods::ABS)) {
+      ErrInfo = "ABS not allowed in VOP3B instructions";
+      return false;
+    }
    }
  
    if (isSOP2(MI) || isSOPC(MI)) {
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td

index cb8a1c6..a2ff89f 100644 (file)
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -193,12 +193,8 @@ class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProf
  }
  
  class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
-  // v_div_scale_{f32|f64} do not support input modifiers.
-  let HasModifiers = 0;
-  let HasClamp = 0;
-  let HasOMod = 0;
    let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
-  let Asm64 = " $vdst, $sdst, $src0, $src1, $src2";
+  let Asm64 = " $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod";
  }
  
  def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32> {
@@ -388,13 +384,11 @@ def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPU
  let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it does.
  def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
    let SchedRW = [WriteFloatFMA, WriteSALU];
-  let AsmMatchConverter = "";
  }
  
  // Double precision division pre-scale.
  def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
    let SchedRW = [WriteDouble, WriteSALU];
-  let AsmMatchConverter = "";
    let FPDPRounding = 1;
  }
  } // End mayRaiseFPException = 0
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll b/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll

index 216ab53..c7fc21e 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll
@@ -11,8 +11,8 @@ define float @fdiv_f32(float %a, float %b) #0 {
    ; GCN:   [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
    ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
    ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GCN:   %6:vgpr_32, %7:sreg_64 = nofpexcept V_DIV_SCALE_F32 [[COPY2]], [[COPY1]], [[COPY2]], implicit $mode, implicit $exec
-  ; GCN:   %8:vgpr_32, %9:sreg_64 = nofpexcept V_DIV_SCALE_F32 [[COPY1]], [[COPY1]], [[COPY2]], implicit $mode, implicit $exec
+  ; GCN:   %6:vgpr_32, %7:sreg_64 = nofpexcept V_DIV_SCALE_F32 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
+  ; GCN:   %8:vgpr_32, %9:sreg_64 = nofpexcept V_DIV_SCALE_F32 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
    ; GCN:   %10:vgpr_32 = nofpexcept V_RCP_F32_e64 0, %8, 0, 0, implicit $mode, implicit $exec
    ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3
    ; GCN:   [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216
@@ -44,8 +44,8 @@ define float @fdiv_nnan_f32(float %a, float %b) #0 {
    ; GCN:   [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
    ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
    ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GCN:   %6:vgpr_32, %7:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32 [[COPY2]], [[COPY1]], [[COPY2]], implicit $mode, implicit $exec
-  ; GCN:   %8:vgpr_32, %9:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32 [[COPY1]], [[COPY1]], [[COPY2]], implicit $mode, implicit $exec
+  ; GCN:   %6:vgpr_32, %7:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
+  ; GCN:   %8:vgpr_32, %9:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
    ; GCN:   %10:vgpr_32 = nnan nofpexcept V_RCP_F32_e64 0, %8, 0, 0, implicit $mode, implicit $exec
    ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3
    ; GCN:   [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216
diff --git a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir

index 2a1442b..ab725ed 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
+++ b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
@@ -55,7 +55,7 @@ body: |
      S_BRANCH %bb.3
  
    bb.3:
-    $vgpr4, $vcc = V_DIV_SCALE_F32 $vgpr1, $vgpr1, $vgpr3, implicit $mode, implicit $exec
+    $vgpr4, $vcc = V_DIV_SCALE_F32 0, $vgpr1, 0, $vgpr1, 0, $vgpr3, 0, 0, implicit $mode, implicit $exec
      $vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec
      S_ENDPGM 0
  
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll

index 25c606c..2dd7430 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
@@ -322,8 +322,7 @@ define amdgpu_kernel void @test_div_scale_f32_inline_imm_den(float addrspace(1)*
  ; SI-LABEL: {{^}}test_div_scale_f32_fneg_num:
  ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
  ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; SI: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
-; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[NEG_A]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], -[[A]]
  ; SI: buffer_store_dword [[RESULT0]]
  ; SI: s_endpgm
  define amdgpu_kernel void @test_div_scale_f32_fneg_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
@@ -368,8 +367,7 @@ define amdgpu_kernel void @test_div_scale_f32_fabs_num(float addrspace(1)* %out,
  ; SI-LABEL: {{^}}test_div_scale_f32_fneg_den:
  ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
  ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; SI: v_xor_b32_e32 [[NEG_B:v[0-9]+]], 0x80000000, [[B]]
-; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[NEG_B]], [[NEG_B]], [[A]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], -[[B]], -[[B]], [[A]]
  ; SI: buffer_store_dword [[RESULT0]]
  ; SI: s_endpgm
  define amdgpu_kernel void @test_div_scale_f32_fneg_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir

index 6e0d016..0fab87d 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
@@ -288,9 +288,9 @@ body:             |
      %87:vgpr_32 = IMPLICIT_DEF
      %88:vgpr_32 = IMPLICIT_DEF
      %90:vgpr_32 = IMPLICIT_DEF
-    %91:vgpr_32, dead %92:sreg_64 = nofpexcept V_DIV_SCALE_F32 %90, %90, 1065353216, implicit $mode, implicit $exec
+    %91:vgpr_32, dead %92:sreg_64 = nofpexcept V_DIV_SCALE_F32 0, %90, 0, %90, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
      %95:vgpr_32 = nofpexcept V_FMA_F32 0, 0, 0, 0, 0, undef %93:vgpr_32, 0, 0, implicit $mode, implicit $exec
-    %96:vgpr_32, %97:sreg_64 = nofpexcept V_DIV_SCALE_F32 1065353216, %90, 1065353216, implicit $mode, implicit $exec
+    %96:vgpr_32, %97:sreg_64 = nofpexcept V_DIV_SCALE_F32 0, 1065353216, 0, %90, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
      %98:vgpr_32 = IMPLICIT_DEF
      %99:vgpr_32 = IMPLICIT_DEF
      %100:vgpr_32 = IMPLICIT_DEF
@@ -299,11 +299,11 @@ body:             |
      %103:vgpr_32 = IMPLICIT_DEF
      %104:vgpr_32 = IMPLICIT_DEF
      %105:vgpr_32 = IMPLICIT_DEF
-    %106:vgpr_32, dead %107:sreg_64 = nofpexcept V_DIV_SCALE_F32 %90, %90, %105, implicit $mode, implicit $exec
+    %106:vgpr_32, dead %107:sreg_64 = nofpexcept V_DIV_SCALE_F32 0, %90, 0, %90, 0, %105, 0, 0, implicit $mode, implicit $exec
      %108:vgpr_32 = nofpexcept V_RCP_F32_e32 0, implicit $mode, implicit $exec
      %109:vgpr_32 = IMPLICIT_DEF
      %110:vgpr_32 = nofpexcept V_FMA_F32 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
-    %111:vgpr_32, %112:sreg_64 = nofpexcept V_DIV_SCALE_F32 0, 0, 0, implicit $mode, implicit $exec
+    %111:vgpr_32, %112:sreg_64 = nofpexcept V_DIV_SCALE_F32 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
      %113:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %110, implicit $mode, implicit $exec
      %114:vgpr_32 = IMPLICIT_DEF
      %115:vgpr_32 = IMPLICIT_DEF
diff --git a/llvm/test/MC/AMDGPU/vop3-errs.s b/llvm/test/MC/AMDGPU/vop3-errs.s

index 2d59d55..166e0a1 100644 (file)
--- a/llvm/test/MC/AMDGPU/vop3-errs.s
+++ b/llvm/test/MC/AMDGPU/vop3-errs.s
@@ -92,3 +92,13 @@ v_interp_p1ll_f16 v5, v2, attr31.x v0
  v_interp_p2_f16 v5, v2, attr1.x, v3 mul:2
  // GFX67: error: instruction not supported on this GPU
  // GFX89: error: invalid operand for instruction
+
+//
+// v_div_scale_*
+//
+
+v_div_scale_f32  v24, vcc, v22, v22, |v20|
+// GCN: error: ABS not allowed in VOP3B instructions
+
+v_div_scale_f64  v[24:25], vcc, -|v[22:23]|, v[22:23], v[20:21]
+// GCN: error: ABS not allowed in VOP3B instructions
diff --git a/llvm/test/MC/AMDGPU/vop3.s b/llvm/test/MC/AMDGPU/vop3.s

index 580b928..eb11d6e 100644 (file)
--- a/llvm/test/MC/AMDGPU/vop3.s
+++ b/llvm/test/MC/AMDGPU/vop3.s
@@ -411,14 +411,34 @@ v_div_scale_f64  v[24:25], vcc, v[22:23], v[22:23], v[20:21]
  // SICI: v_div_scale_f64 v[24:25], vcc, v[22:23], v[22:23], v[20:21] ; encoding: [0x18,0x6a,0xdc,0xd2,0x16,0x2d,0x52,0x04]
  // VI:   v_div_scale_f64 v[24:25], vcc, v[22:23], v[22:23], v[20:21] ; encoding: [0x18,0x6a,0xe1,0xd1,0x16,0x2d,0x52,0x04]
  
-v_div_scale_f64  v[24:25], s[10:11], v[22:23], v[20:21], v[20:21]
-// SICI: v_div_scale_f64 v[24:25], s[10:11], v[22:23], v[20:21], v[20:21] ; encoding: [0x18,0x0a,0xdc,0xd2,0x16,0x29,0x52,0x04]
-// VI:   v_div_scale_f64 v[24:25], s[10:11], v[22:23], v[20:21], v[20:21] ; encoding: [0x18,0x0a,0xe1,0xd1,0x16,0x29,0x52,0x04]
+v_div_scale_f64  v[24:25], s[10:11], -v[22:23], v[20:21], v[20:21] clamp
+// SICI: v_div_scale_f64 v[24:25], s[10:11], -v[22:23], v[20:21], v[20:21] clamp ; encoding: [0x18,0x0a,0xdc,0xd2,0x16,0x29,0x52,0x24]
+// VI:   v_div_scale_f64 v[24:25], s[10:11], -v[22:23], v[20:21], v[20:21] clamp ; encoding: [0x18,0x8a,0xe1,0xd1,0x16,0x29,0x52,0x24]
+
+v_div_scale_f64  v[24:25], s[10:11], v[22:23], -v[20:21], v[20:21] clamp mul:2
+// SICI: v_div_scale_f64 v[24:25], s[10:11], v[22:23], -v[20:21], v[20:21] clamp mul:2 ; encoding: [0x18,0x0a,0xdc,0xd2,0x16,0x29,0x52,0x4c]
+// VI:   v_div_scale_f64 v[24:25], s[10:11], v[22:23], -v[20:21], v[20:21] clamp mul:2 ; encoding: [0x18,0x8a,0xe1,0xd1,0x16,0x29,0x52,0x4c]
+
+v_div_scale_f64  v[24:25], s[10:11], v[22:23], v[20:21], -v[20:21]
+// SICI: v_div_scale_f64 v[24:25], s[10:11], v[22:23], v[20:21], -v[20:21] ; encoding: [0x18,0x0a,0xdc,0xd2,0x16,0x29,0x52,0x84]
+// VI:   v_div_scale_f64 v[24:25], s[10:11], v[22:23], v[20:21], -v[20:21] ; encoding: [0x18,0x0a,0xe1,0xd1,0x16,0x29,0x52,0x84]
  
  v_div_scale_f32  v24, vcc, v22, v22, v20
  // SICI: v_div_scale_f32 v24, vcc, v22, v22, v20 ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0x2d,0x52,0x04]
  // VI:   v_div_scale_f32 v24, vcc, v22, v22, v20 ; encoding: [0x18,0x6a,0xe0,0xd1,0x16,0x2d,0x52,0x04]
  
+v_div_scale_f32  v24, vcc, -v22, v22, v20
+// SICI: v_div_scale_f32 v24, vcc, -v22, v22, v20 ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0x2d,0x52,0x24]
+// VI:   v_div_scale_f32 v24, vcc, -v22, v22, v20 ; encoding: [0x18,0x6a,0xe0,0xd1,0x16,0x2d,0x52,0x24]
+
+v_div_scale_f32  v24, vcc, v22, -v22, v20 clamp
+// SICI: v_div_scale_f32 v24, vcc, v22, -v22, v20 clamp ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0x2d,0x52,0x44]
+// VI:   v_div_scale_f32 v24, vcc, v22, -v22, v20 clamp ; encoding: [0x18,0xea,0xe0,0xd1,0x16,0x2d,0x52,0x44]
+
+v_div_scale_f32  v24, vcc, v22, v22, -v20 clamp div:2
+// SICI: v_div_scale_f32 v24, vcc, v22, v22, -v20 clamp div:2 ; encoding: [0x18,0x6a,0xda,0xd2,0x16,0x2d,0x52,0x9c]
+// VI:   v_div_scale_f32 v24, vcc, v22, v22, -v20 clamp div:2 ; encoding: [0x18,0xea,0xe0,0xd1,0x16,0x2d,0x52,0x9c]
+
  v_div_scale_f32  v24, s[10:11], v22, v22, v20
  // SICI: v_div_scale_f32 v24, s[10:11], v22, v22, v20 ; encoding: [0x18,0x0a,0xda,0xd2,0x16,0x2d,0x52,0x04]
  // VI:   v_div_scale_f32 v24, s[10:11], v22, v22, v20 ; encoding: [0x18,0x0a,0xe0,0xd1,0x16,0x2d,0x52,0x04]
author	Jay Foad <jay.foad@amd.com>
	Tue, 27 Oct 2020 12:29:11 +0000 (12:29 +0000)
committer	Jay Foad <jay.foad@amd.com>
	Wed, 28 Oct 2020 21:54:14 +0000 (21:54 +0000)
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h		patch \| blob \| history
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/VOP3Instructions.td		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir		patch \| blob \| history
llvm/test/MC/AMDGPU/vop3-errs.s		patch \| blob \| history
llvm/test/MC/AMDGPU/vop3.s		patch \| blob \| history