AMDGPU: Add intrinsic for s_setreg
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Wed, 6 May 2020 20:43:33 +0000 (16:43 -0400)
committerMatt Arsenault <Matthew.Arsenault@amd.com>
Thu, 28 May 2020 18:26:38 +0000 (14:26 -0400)
This will be more useful with fenv access implemented.

clang/include/clang/Basic/BuiltinsAMDGPU.def
clang/test/CodeGenOpenCL/builtins-amdgcn.cl
clang/test/SemaOpenCL/builtins-amdgcn-error.cl
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/SOPInstructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll [new file with mode: 0644]
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll [new file with mode: 0644]

index 5633ccd5d744cc116ec17696226519fb7b26bff6..28379142b05adc8df65e0bbbee9fc7ab46da421b 100644 (file)
@@ -44,6 +44,7 @@ BUILTIN(__builtin_amdgcn_mbcnt_lo, "UiUiUi", "nc")
 // Instruction builtins.
 //===----------------------------------------------------------------------===//
 BUILTIN(__builtin_amdgcn_s_getreg, "UiIi", "n")
+BUILTIN(__builtin_amdgcn_s_setreg, "vIiUi", "n")
 BUILTIN(__builtin_amdgcn_s_getpc, "LUi", "n")
 BUILTIN(__builtin_amdgcn_s_waitcnt, "vIi", "n")
 BUILTIN(__builtin_amdgcn_s_sendmsg, "vIiUi", "n")
index 8f2f149103b340c12fd6f33e707ef90a54a784d9..3563ad464c66acf193cf94475c0d2af9323867ef 100644 (file)
@@ -715,6 +715,12 @@ kernel void test_mqsad_u32_u8(global uint4* out, ulong src0, uint src1, uint4 sr
   *out = __builtin_amdgcn_mqsad_u32_u8(src0, src1, src2);
 }
 
+// CHECK-LABEL: test_s_setreg(
+// CHECK: call void @llvm.amdgcn.s.setreg(i32 8193, i32 %val)
+kernel void test_s_setreg(uint val) {
+  __builtin_amdgcn_s_setreg(8193, val);
+}
+
 // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024}
 // CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025}
 // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly }
index ad5e8776b2e871c1755738afe0c61d0cdc606ec3..dbe2900b600bf2114facd1d74ea9f9c8e7191c3c 100644 (file)
@@ -139,3 +139,8 @@ void test_fence() {
   const char ptr[] = "workgroup";
   __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}}
 }
+
+void test_s_setreg(int x, int y) {
+  __builtin_amdgcn_s_setreg(x, 0); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}}
+  __builtin_amdgcn_s_setreg(x, y); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}}
+}
index e2d8f3cb1bd606d2e5b5f64133fa54a8555070ca..40449304ed04f2eb3ba2ae9db5185438a160bd50 100644 (file)
@@ -1207,6 +1207,16 @@ def int_amdgcn_s_getreg :
   [IntrInaccessibleMemOnly, IntrReadMem, IntrSpeculatable, ImmArg<ArgIndex<0>>]
 >;
 
+// Note this can be used to set FP environment properties that are
+// unsafe to change in non-strictfp functions. The register properties
+// available (and value required to access them) may differ per
+// subtarget. llvm.amdgcn.s.setreg(hwmode, value)
+def int_amdgcn_s_setreg :
+  GCCBuiltin<"__builtin_amdgcn_s_setreg">,
+  Intrinsic<[], [llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]
+>;
+
 // int_amdgcn_s_getpc is provided to allow a specific style of position
 // independent code to determine the high part of its address when it is
 // known (through convention) that the code and any data of interest does
index 3b8f88271458d989e8b3252529bd22ddff872723..59f9866b93b65042170784977f6e8b8d029a1fd3 100644 (file)
@@ -202,13 +202,6 @@ def AMDGPUSetCCOp : SDTypeProfile<1, 3, [        // setcc
 
 def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>;
 
-def AMDGPUSetRegOp :  SDTypeProfile<0, 2, [
-  SDTCisInt<0>, SDTCisInt<1>
-]>;
-
-def AMDGPUsetreg : SDNode<"AMDGPUISD::SETREG", AMDGPUSetRegOp, [
-  SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>;
-
 def AMDGPUfma : SDNode<"AMDGPUISD::FMA_W_CHAIN", SDTFPTernaryOp, [
    SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
index f11563a66d4106bad11b77454b91e3a2635ff053..c6e0cb2b9cfa11e2f997f61d5b196405d4042797 100644 (file)
@@ -2783,6 +2783,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
       constrainOpWithReadfirstlane(MI, MRI, 2); // M0
       return;
     }
+    case Intrinsic::amdgcn_s_setreg: {
+      constrainOpWithReadfirstlane(MI, MRI, 2);
+      return;
+    }
     default: {
       if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
               AMDGPU::lookupRsrcIntrinsic(IntrID)) {
@@ -3924,6 +3928,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
       break;
     }
+    case Intrinsic::amdgcn_s_setreg: {
+      // This must be an SGPR, but accept a VGPR.
+      unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+                                   AMDGPU::SGPRRegBankID);
+      OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
+      break;
+    }
     case Intrinsic::amdgcn_end_cf:
     case Intrinsic::amdgcn_init_exec: {
       unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
index 042087ec5a4dedf829161e30ffdbaff601831cbe..3b8930c433a3ad2ddb3cecd11d9fc4117b087b31 100644 (file)
@@ -7977,32 +7977,32 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
   const unsigned Denorm32Reg = AMDGPU::Hwreg::ID_MODE |
                                (4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
                                (1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
-  const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i16);
+  const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i32);
 
   const bool HasFP32Denormals = hasFP32Denormals(DAG.getMachineFunction());
 
   if (!HasFP32Denormals) {
     SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
 
-    SDValue EnableDenorm;
+    SDNode *EnableDenorm;
     if (Subtarget->hasDenormModeInst()) {
       const SDValue EnableDenormValue =
           getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, SL, Subtarget);
 
       EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs,
-                                 DAG.getEntryNode(), EnableDenormValue);
+                                 DAG.getEntryNode(), EnableDenormValue).getNode();
     } else {
       const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE,
                                                         SL, MVT::i32);
-      EnableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, BindParamVTs,
-                                 DAG.getEntryNode(), EnableDenormValue,
-                                 BitField);
+      EnableDenorm =
+          DAG.getMachineNode(AMDGPU::S_SETREG_B32, SL, BindParamVTs,
+                             {EnableDenormValue, BitField, DAG.getEntryNode()});
     }
 
     SDValue Ops[3] = {
       NegDivScale0,
-      EnableDenorm.getValue(0),
-      EnableDenorm.getValue(1)
+      SDValue(EnableDenorm, 0),
+      SDValue(EnableDenorm, 1)
     };
 
     NegDivScale0 = DAG.getMergeValues(Ops, SL);
@@ -8026,25 +8026,25 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
                              NumeratorScaled, Fma3);
 
   if (!HasFP32Denormals) {
-    SDValue DisableDenorm;
+    SDNode *DisableDenorm;
     if (Subtarget->hasDenormModeInst()) {
       const SDValue DisableDenormValue =
           getSPDenormModeValue(FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, SL, Subtarget);
 
       DisableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, MVT::Other,
                                   Fma4.getValue(1), DisableDenormValue,
-                                  Fma4.getValue(2));
+                                  Fma4.getValue(2)).getNode();
     } else {
       const SDValue DisableDenormValue =
           DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
 
-      DisableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other,
-                                  Fma4.getValue(1), DisableDenormValue,
-                                  BitField, Fma4.getValue(2));
+      DisableDenorm = DAG.getMachineNode(
+          AMDGPU::S_SETREG_B32, SL, MVT::Other,
+          {DisableDenormValue, BitField, Fma4.getValue(1), Fma4.getValue(2)});
     }
 
     SDValue OutputChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
-                                      DisableDenorm, DAG.getRoot());
+                                      SDValue(DisableDenorm, 0), DAG.getRoot());
     DAG.setRoot(OutputChain);
   }
 
index 62b7f8318fd027b386be95fc9e703abe0d3dd788..529e80e679689affd89cfdd3c4b6d4eabb11e653 100644 (file)
@@ -1131,7 +1131,7 @@ def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>;
 def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>;
 def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>;
 
-def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
+def hwreg : NamedOperandU32<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
 
 def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
 
index 7b8c2c27b806337080d8bfc91c8dd4182dceaf37..dbafea5a1347ec829312442646aae14fb8faaa97 100644 (file)
@@ -801,13 +801,13 @@ def S_GETREG_B32 : SOPK_Pseudo <
 >;
 }
 
-let hasSideEffects = 1 in {
+let hasSideEffects = 1, mayLoad = 0, mayStore =0 in {
 
 def S_SETREG_B32 : SOPK_Pseudo <
   "s_setreg_b32",
   (outs), (ins SReg_32:$sdst, hwreg:$simm16),
   "$simm16, $sdst",
-  [(AMDGPUsetreg i32:$sdst, (i16 timm:$simm16))]> {
+  [(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst)]> {
   let Defs = [MODE];
   let Uses = [MODE];
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
new file mode 100644 (file)
index 0000000..85ed95e
--- /dev/null
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+
+; Set FP32 fp_round to round to zero
+define amdgpu_kernel void @test_setreg_f32_round_mode_rtz() {
+; GCN-LABEL: test_setreg_f32_round_mode_rtz:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 3), 3
+; GCN-NEXT:    s_endpgm
+  call void @llvm.amdgcn.s.setreg(i32 4097, i32 3)
+  ret void
+}
+
+; Set FP64/FP16 fp_round to round to zero
+define amdgpu_kernel void @test_setreg_f64_round_mode_rtz() {
+; GCN-LABEL: test_setreg_f64_round_mode_rtz:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 3), 3
+; GCN-NEXT:    s_endpgm
+  call void @llvm.amdgcn.s.setreg(i32 4225, i32 3)
+  ret void
+}
+
+; Set all fp_round to round to zero
+define amdgpu_kernel void @test_setreg_all_round_mode_rtz() {
+; GCN-LABEL: test_setreg_all_round_mode_rtz:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 5), 7
+; GCN-NEXT:    s_endpgm
+  call void @llvm.amdgcn.s.setreg(i32 8193, i32 7)
+  ret void
+}
+
+; Set FP32 fp_round to dynamic mode
+define amdgpu_cs void @test_setreg_roundingmode_var(i32 inreg %var.mode) {
+; GCN-LABEL: test_setreg_roundingmode_var:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s0
+; GCN-NEXT:    s_endpgm
+  call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)
+  ret void
+}
+
+define void @test_setreg_roundingmode_var_vgpr(i32 %var.mode) {
+; GCN-LABEL: test_setreg_roundingmode_var_vgpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_readfirstlane_b32 s4, v0
+; GCN-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s4
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)
+  ret void
+}
+
+declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #0
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
new file mode 100644 (file)
index 0000000..88e6bd4
--- /dev/null
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; FIXME: This copy of the test is a subset of the -global-isel version, since the VGPR case doesn't work.
+
+; Set FP32 fp_round to round to zero
+define amdgpu_kernel void @test_setreg_f32_round_mode_rtz() {
+; GCN-LABEL: test_setreg_f32_round_mode_rtz:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 3), 3
+; GCN-NEXT:    s_endpgm
+  call void @llvm.amdgcn.s.setreg(i32 4097, i32 3)
+  ret void
+}
+
+; Set FP64/FP16 fp_round to round to zero
+define amdgpu_kernel void @test_setreg_f64_round_mode_rtz() {
+; GCN-LABEL: test_setreg_f64_round_mode_rtz:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 3), 3
+; GCN-NEXT:    s_endpgm
+  call void @llvm.amdgcn.s.setreg(i32 4225, i32 3)
+  ret void
+}
+
+; Set all fp_round to round to zero
+define amdgpu_kernel void @test_setreg_all_round_mode_rtz() {
+; GCN-LABEL: test_setreg_all_round_mode_rtz:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 5), 7
+; GCN-NEXT:    s_endpgm
+  call void @llvm.amdgcn.s.setreg(i32 8193, i32 7)
+  ret void
+}
+
+; Set FP32 fp_round to dynamic mode
+define amdgpu_cs void @test_setreg_roundingmode_var(i32 inreg %var.mode) {
+; GCN-LABEL: test_setreg_roundingmode_var:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s0
+; GCN-NEXT:    s_endpgm
+  call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)
+  ret void
+}
+
+; FIXME: Broken for DAG
+; define void @test_setreg_roundingmode_var_vgpr(i32 %var.mode) {
+;   call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)
+;   ret void
+; }
+
+declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #0
+
+attributes #0 = { nounwind }