#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600Subtarget.h"
+#include "R600TargetMachine.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
setHasExtractBitsInsn(true);
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
for (MVT VT : ScalarIntVTs)
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
+ case ISD::ADDRSPACECAST:
+ return lowerADDRSPACECAST(Op, DAG);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
unsigned IntrinsicID =
DAG.getCondCode(ISD::SETNE));
}
+SDValue R600TargetLowering::lowerADDRSPACECAST(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ EVT VT = Op.getValueType();
+
+ const R600TargetMachine &TM =
+ static_cast<const R600TargetMachine &>(getTargetMachine());
+
+ const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Op);
+ unsigned SrcAS = ASC->getSrcAddressSpace();
+ unsigned DestAS = ASC->getDestAddressSpace();
+
+ if (auto *ConstSrc = dyn_cast<ConstantSDNode>(Op.getOperand(0))) {
+ if (SrcAS == AMDGPUAS::FLAT_ADDRESS && ConstSrc->isNullValue())
+ return DAG.getConstant(TM.getNullPointerValue(DestAS), SL, VT);
+ }
+
+ return Op;
+}
+
/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
/// convert these pointers to a register index. Each register holds
/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck %s
+
+define amdgpu_kernel void @addrspacecast_flat_to_global(ptr addrspace(1) %out, ptr %src.ptr) {
+; CHECK-LABEL: addrspacecast_flat_to_global:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; CHECK-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
+; CHECK-NEXT: CF_END
+; CHECK-NEXT: PAD
+; CHECK-NEXT: ALU clause starting at 4:
+; CHECK-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
+; CHECK-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CHECK-NEXT: MOV * T1.X, KC0[2].Z,
+ %cast = addrspacecast ptr %src.ptr to ptr addrspace(1)
+ store ptr addrspace(1) %cast, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @addrspacecast_global_to_flat(ptr addrspace(1) %out, ptr addrspace(1) %src.ptr) {
+; CHECK-LABEL: addrspacecast_global_to_flat:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
+; CHECK-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
+; CHECK-NEXT: CF_END
+; CHECK-NEXT: PAD
+; CHECK-NEXT: ALU clause starting at 4:
+; CHECK-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
+; CHECK-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CHECK-NEXT: MOV * T1.X, KC0[2].Z,
+ %cast = addrspacecast ptr addrspace(1) %src.ptr to ptr
+ store ptr %cast, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @addrspacecast_flat_null_to_local(ptr addrspace(1) %out) {
+; CHECK-LABEL: addrspacecast_flat_null_to_local:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
+; CHECK-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CHECK-NEXT: CF_END
+; CHECK-NEXT: PAD
+; CHECK-NEXT: ALU clause starting at 4:
+; CHECK-NEXT: MOV * T0.X, literal.x,
+; CHECK-NEXT: -1(nan), 0(0.000000e+00)
+; CHECK-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CHECK-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @addrspacecast_flat_null_to_global(ptr addrspace(1) %out) {
+; CHECK-LABEL: addrspacecast_flat_null_to_global:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
+; CHECK-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CHECK-NEXT: CF_END
+; CHECK-NEXT: PAD
+; CHECK-NEXT: ALU clause starting at 4:
+; CHECK-NEXT: MOV * T0.X, literal.x,
+; CHECK-NEXT: 0(0.000000e+00), 0(0.000000e+00)
+; CHECK-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CHECK-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ store ptr addrspace(1) addrspacecast (ptr null to ptr addrspace(1)), ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @addrspacecast_flat_undef_to_local(ptr addrspace(1) %out) {
+; CHECK-LABEL: addrspacecast_flat_undef_to_local:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: CF_END
+; CHECK-NEXT: PAD
+ store ptr addrspace(3) addrspacecast (ptr undef to ptr addrspace(3)), ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @addrspacecast_flat_poison_to_local(ptr addrspace(1) %out) {
+; CHECK-LABEL: addrspacecast_flat_poison_to_local:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: CF_END
+; CHECK-NEXT: PAD
+ store ptr addrspace(3) addrspacecast (ptr poison to ptr addrspace(3)), ptr addrspace(1) %out
+ ret void
+}