From: Tom Stellard Date: Tue, 23 Jul 2013 01:48:18 +0000 (+0000) Subject: R600: Use KCache for kernel arguments X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1e80309ebe672ca6302c0b6d400a72c25791cbc7;p=platform%2Fupstream%2Fllvm.git R600: Use KCache for kernel arguments Reviewed-by: Vincent Lejeune llvm-svn: 186918 --- diff --git a/llvm/lib/Target/R600/AMDGPU.h b/llvm/lib/Target/R600/AMDGPU.h index 7621422..fbf1fce 100644 --- a/llvm/lib/Target/R600/AMDGPU.h +++ b/llvm/lib/Target/R600/AMDGPU.h @@ -74,6 +74,12 @@ enum AddressSpaces { ADDRESS_NONE = 5, ///< Address space for unknown memory. PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0) PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1) + + // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on this + // order to be able to dynamically index a constant buffer, for example: + // + // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx + CONSTANT_BUFFER_0 = 8, CONSTANT_BUFFER_1 = 9, CONSTANT_BUFFER_2 = 10, diff --git a/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp b/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp index e79ab3c..78036a8 100644 --- a/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -565,24 +565,11 @@ bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS); } -bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) const { - if (checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)) { - return true; - } - - const DataLayout *DL = TM.getDataLayout(); - MachineMemOperand *MMO = N->getMemOperand(); - const Value *V = MMO->getValue(); - const Value *BV = GetUnderlyingObject(V, DL, 0); - if (MMO - && MMO->getValue() - && ((V && dyn_cast(V)) - || (BV && dyn_cast( - GetUnderlyingObject(MMO->getValue(), DL, 0))))) { - return checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS); - } else { - return false; +bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const { + if (CbId == -1) { + return checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS); } + return checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_BUFFER_0 + CbId); } bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const { diff --git a/llvm/lib/Target/R600/R600ISelLowering.cpp b/llvm/lib/Target/R600/R600ISelLowering.cpp index 7f93f23..dd613d5 100644 --- a/llvm/lib/Target/R600/R600ISelLowering.cpp +++ b/llvm/lib/Target/R600/R600ISelLowering.cpp @@ -72,10 +72,10 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::LOAD, MVT::i32, Custom); setOperationAction(ISD::LOAD, MVT::v2i32, Expand); setOperationAction(ISD::LOAD, MVT::v4i32, Custom); - setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); - setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Expand); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom); setOperationAction(ISD::STORE, MVT::i8, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); setOperationAction(ISD::STORE, MVT::v2i32, Expand); @@ -775,7 +775,7 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, unsigned DwordOffset) const { unsigned ByteOffset = DwordOffset * 4; PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), - AMDGPUAS::PARAM_I_ADDRESS); + AMDGPUAS::CONSTANT_BUFFER_0); // We shouldn't be using an offset wider than 16-bits for implicit parameters. assert(isInt<16>(ByteOffset)); @@ -1219,40 +1219,20 @@ SDValue R600TargetLowering::LowerFormalArguments( AnalyzeFormalArguments(CCInfo, Ins); - Function::const_arg_iterator FuncArg = - DAG.getMachineFunction().getFunction()->arg_begin(); - for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) { + for (unsigned i = 0, e = Ins.size(); i < e; ++i) { CCValAssign &VA = ArgLocs[i]; EVT VT = VA.getLocVT(); - Type *ArgType = FuncArg->getType(); - unsigned ArgSizeInBits = ArgType->isPointerTy() ? - 32 : ArgType->getPrimitiveSizeInBits(); - unsigned ArgBytes = ArgSizeInBits >> 3; - EVT ArgVT; - if (ArgSizeInBits < VT.getSizeInBits()) { - assert(!ArgType->isFloatTy() && - "Extending floating point arguments not supported yet"); - ArgVT = MVT::getIntegerVT(ArgSizeInBits); - } else { - ArgVT = VT; - } - - ISD::LoadExtType LoadType = ISD::EXTLOAD; - if (Ins[i].Flags.isZExt()) { - LoadType = ISD::ZEXTLOAD; - } else if (Ins[i].Flags.isSExt()) { - LoadType = ISD::SEXTLOAD; - } PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), - AMDGPUAS::PARAM_I_ADDRESS); + AMDGPUAS::CONSTANT_BUFFER_0); // The first 36 bytes of the input buffer contains information about // thread group and global sizes. - SDValue Arg = DAG.getExtLoad(LoadType, DL, VT, DAG.getRoot(), + SDValue Arg = DAG.getLoad(VT, DL, Chain, DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32), - MachinePointerInfo(UndefValue::get(PtrTy)), - ArgVT, false, false, ArgBytes); + MachinePointerInfo(UndefValue::get(PtrTy)), false, + false, false, 4); // 4 is the prefered alignment for + // the CONSTANT memory space. InVals.push_back(Arg); } return Chain; diff --git a/llvm/lib/Target/R600/R600Instructions.td b/llvm/lib/Target/R600/R600Instructions.td index 8a49a8d..632cbcf 100644 --- a/llvm/lib/Target/R600/R600Instructions.td +++ b/llvm/lib/Target/R600/R600Instructions.td @@ -313,7 +313,7 @@ class VTX_READ buffer_id, dag outs, list pattern> class LoadParamFrag : PatFrag < (ops node:$ptr), (load_type node:$ptr), - [{ return isParamLoad(dyn_cast(N)); }] + [{ return isConstantLoad(dyn_cast(N), 0); }] >; def load_param : LoadParamFrag; diff --git a/llvm/test/CodeGen/R600/128bit-kernel-args.ll b/llvm/test/CodeGen/R600/128bit-kernel-args.ll index 2fc8381..5c14270 100644 --- a/llvm/test/CodeGen/R600/128bit-kernel-args.ll +++ b/llvm/test/CodeGen/R600/128bit-kernel-args.ll @@ -2,7 +2,10 @@ ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK ; R600-CHECK: @v4i32_kernel_arg -; R600-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 52 +; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y +; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z +; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W +; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X ; SI-CHECK: @v4i32_kernel_arg ; SI-CHECK: BUFFER_STORE_DWORDX4 define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32> %in) { @@ -12,7 +15,10 @@ entry: } ; R600-CHECK: @v4f32_kernel_arg -; R600-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 52 +; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y +; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z +; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W +; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X ; SI-CHECK: @v4f32_kernel_arg ; SI-CHECK: BUFFER_STORE_DWORDX4 define void @v4f32_kernel_args(<4 x float> addrspace(1)* %out, <4 x float> %in) { diff --git a/llvm/test/CodeGen/R600/add.ll b/llvm/test/CodeGen/R600/add.ll index dd590e5..16f7f97 100644 --- a/llvm/test/CodeGen/R600/add.ll +++ b/llvm/test/CodeGen/R600/add.ll @@ -2,8 +2,8 @@ ; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s ;EG-CHECK: @test2 -;EG-CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: ADD_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal\.[xyzw]}} +;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;SI-CHECK: @test2 ;SI-CHECK: V_ADD_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}} @@ -19,10 +19,10 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { } ;EG-CHECK: @test4 -;EG-CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: ADD_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: ADD_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG-CHECK: ADD_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;SI-CHECK: @test4 ;SI-CHECK: V_ADD_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}} diff --git a/llvm/test/CodeGen/R600/bfi_int.ll b/llvm/test/CodeGen/R600/bfi_int.ll index a1bd09a..b001ad0 100644 --- a/llvm/test/CodeGen/R600/bfi_int.ll +++ b/llvm/test/CodeGen/R600/bfi_int.ll @@ -36,8 +36,8 @@ entry: ; SHA-256 Ma function ; ((x & z) | (y & (x | z))) ; R600-CHECK: @bfi_sha256_ma -; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], +; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W ; SI-CHECK: V_XOR_B32_e64 [[DST:VGPR[0-9]+]], {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}} ; SI-CHECK: V_BFI_B32 {{VGPR[0-9]+}}, [[DST]], {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}} diff --git a/llvm/test/CodeGen/R600/fdiv.ll b/llvm/test/CodeGen/R600/fdiv.ll index f6eb6a6..c581ec9 100644 --- a/llvm/test/CodeGen/R600/fdiv.ll +++ b/llvm/test/CodeGen/R600/fdiv.ll @@ -1,19 +1,17 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}} -;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}} -;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}} -;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}} +;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}} +;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}} +;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}} +;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}} +;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}} +;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}} +;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}} +;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}} -define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { - %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 - %a = load <4 x float> addrspace(1) * %in - %b = load <4 x float> addrspace(1) * %b_ptr - %result = fdiv <4 x float> %a, %b - store <4 x float> %result, <4 x float> addrspace(1)* %out +define void @test(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) { +entry: + %0 = fdiv <4 x float> %a, %b + store <4 x float> %0, <4 x float> addrspace(1)* %out ret void } diff --git a/llvm/test/CodeGen/R600/fp_to_sint.ll b/llvm/test/CodeGen/R600/fp_to_sint.ll index 5a608fd..77ab328 100644 --- a/llvm/test/CodeGen/R600/fp_to_sint.ll +++ b/llvm/test/CodeGen/R600/fp_to_sint.ll @@ -1,10 +1,10 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ; CHECK: @fp_to_sint_v4i32 -; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +; CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { %value = load <4 x float> addrspace(1) * %in diff --git a/llvm/test/CodeGen/R600/i8-to-double-to-float.ll b/llvm/test/CodeGen/R600/i8-to-double-to-float.ll index 708f2f4..6047466 100644 --- a/llvm/test/CodeGen/R600/i8-to-double-to-float.ll +++ b/llvm/test/CodeGen/R600/i8-to-double-to-float.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} +;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test(float addrspace(1)* %out, i8 addrspace(1)* %in) { %1 = load i8 addrspace(1)* %in diff --git a/llvm/test/CodeGen/R600/literals.ll b/llvm/test/CodeGen/R600/literals.ll index 21e5d4c..fbb77b3 100644 --- a/llvm/test/CodeGen/R600/literals.ll +++ b/llvm/test/CodeGen/R600/literals.ll @@ -2,12 +2,12 @@ ; Test using an integer literal constant. ; Generated ASM should be: -; ADD_INT REG literal.x, 5 +; ADD_INT KC0[2].Z literal.x, 5 ; or -; ADD_INT literal.x REG, 5 +; ADD_INT literal.x KC0[2].Z, 5 ; CHECK: @i32_literal -; CHECK: ADD_INT * {{[A-Z0-9,. ]*}}literal.x +; CHECK: ADD_INT * T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x ; CHECK-NEXT: 5 define void @i32_literal(i32 addrspace(1)* %out, i32 %in) { entry: @@ -18,12 +18,12 @@ entry: ; Test using a float literal constant. ; Generated ASM should be: -; ADD REG literal.x, 5.0 +; ADD KC0[2].Z literal.x, 5.0 ; or -; ADD literal.x REG, 5.0 +; ADD literal.x KC0[2].Z, 5.0 ; CHECK: @float_literal -; CHECK: ADD * {{[A-Z0-9,. ]*}}literal.x +; CHECK: ADD * T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x ; CHECK-NEXT: 1084227584(5.0 define void @float_literal(float addrspace(1)* %out, float %in) { entry: diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.trunc.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.trunc.ll index cdc03f8..7627783 100644 --- a/llvm/test/CodeGen/R600/llvm.AMDGPU.trunc.ll +++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.trunc.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s ; R600-CHECK: @amdgpu_trunc -; R600-CHECK: TRUNC * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; R600-CHECK: TRUNC * T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; SI-CHECK: @amdgpu_trunc ; SI-CHECK: V_TRUNC_F32 diff --git a/llvm/test/CodeGen/R600/load.vec.ll b/llvm/test/CodeGen/R600/load.vec.ll index da1149a..b3d6349 100644 --- a/llvm/test/CodeGen/R600/load.vec.ll +++ b/llvm/test/CodeGen/R600/load.vec.ll @@ -3,8 +3,8 @@ ; load a v2i32 value from the global address space. ; EG-CHECK: @load_v2i32 -; EG-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 4 -; EG-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 +; EG-CHECK-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 4 +; EG-CHECK-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 ; SI-CHECK: @load_v2i32 ; SI-CHECK: BUFFER_LOAD_DWORDX2 VGPR{{[0-9]+}} define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { diff --git a/llvm/test/CodeGen/R600/loop-address.ll b/llvm/test/CodeGen/R600/loop-address.ll index 23be327..b46d8e9 100644 --- a/llvm/test/CodeGen/R600/loop-address.ll +++ b/llvm/test/CodeGen/R600/loop-address.ll @@ -1,12 +1,9 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -;CHECK: TEX ;CHECK: ALU_PUSH -;CHECK: JUMP @15 -;CHECK: TEX -;CHECK: LOOP_START_DX10 @14 -;CHECK: LOOP_BREAK @13 -;CHECK: POP @15 +;CHECK: LOOP_START_DX10 @11 +;CHECK: LOOP_BREAK @10 +;CHECK: POP @10 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64" target triple = "r600--" diff --git a/llvm/test/CodeGen/R600/rotr.ll b/llvm/test/CodeGen/R600/rotr.ll index 960d30d..5c4c4e9 100644 --- a/llvm/test/CodeGen/R600/rotr.ll +++ b/llvm/test/CodeGen/R600/rotr.ll @@ -19,7 +19,7 @@ entry: ; R600-CHECK: @rotl ; R600-CHECK: SUB_INT {{\** T[0-9]+\.[XYZW]}}, literal.x ; R600-CHECK-NEXT: 32 -; R600-CHECK: BIT_ALIGN_INT {{\** T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PV.[XYZW]}} +; R600-CHECK: BIT_ALIGN_INT {{\** T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}} ; SI-CHECK: @rotl ; SI-CHECK: V_SUB_I32_e64 [[DST:VGPR[0-9]+]], 32, {{[SV]GPR[0-9]+}} diff --git a/llvm/test/CodeGen/R600/selectcc-opt.ll b/llvm/test/CodeGen/R600/selectcc-opt.ll index 7f568fc..7e2d559 100644 --- a/llvm/test/CodeGen/R600/selectcc-opt.ll +++ b/llvm/test/CodeGen/R600/selectcc-opt.ll @@ -29,7 +29,6 @@ ENDIF: ; for the icmp instruction ; CHECK: @test_b -; CHECK: VTX_READ ; CHECK: SET{{[GTEQN]+}}_DX10 ; CHECK-NEXT: PRED_ ; CHECK-NEXT: ALU clause starting diff --git a/llvm/test/CodeGen/R600/set-dx10.ll b/llvm/test/CodeGen/R600/set-dx10.ll index eb6e9d2..291a7bd 100644 --- a/llvm/test/CodeGen/R600/set-dx10.ll +++ b/llvm/test/CodeGen/R600/set-dx10.ll @@ -5,7 +5,7 @@ ; SET*DX10 instructions. ; CHECK: @fcmp_une_select_fptosi -; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, +; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -18,7 +18,7 @@ entry: } ; CHECK: @fcmp_une_select_i32 -; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, +; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -29,7 +29,7 @@ entry: } ; CHECK: @fcmp_ueq_select_fptosi -; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, +; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -42,7 +42,7 @@ entry: } ; CHECK: @fcmp_ueq_select_i32 -; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, +; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -53,7 +53,7 @@ entry: } ; CHECK: @fcmp_ugt_select_fptosi -; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, +; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -66,7 +66,7 @@ entry: } ; CHECK: @fcmp_ugt_select_i32 -; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, +; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -77,7 +77,7 @@ entry: } ; CHECK: @fcmp_uge_select_fptosi -; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, +; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -90,7 +90,7 @@ entry: } ; CHECK: @fcmp_uge_select_i32 -; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, +; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -101,7 +101,7 @@ entry: } ; CHECK: @fcmp_ule_select_fptosi -; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, +; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -114,7 +114,7 @@ entry: } ; CHECK: @fcmp_ule_select_i32 -; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, +; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -125,7 +125,7 @@ entry: } ; CHECK: @fcmp_ult_select_fptosi -; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, +; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -138,7 +138,7 @@ entry: } ; CHECK: @fcmp_ult_select_i32 -; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, +; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) { entry: diff --git a/llvm/test/CodeGen/R600/short-args.ll b/llvm/test/CodeGen/R600/short-args.ll index 8f4dc96..69a8412 100644 --- a/llvm/test/CodeGen/R600/short-args.ll +++ b/llvm/test/CodeGen/R600/short-args.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s ; CHECK: @i8_arg -; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} +; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { entry: @@ -12,7 +12,7 @@ entry: } ; CHECK: @i8_zext_arg -; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} +; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind { entry: @@ -22,7 +22,7 @@ entry: } ; CHECK: @i8_sext_arg -; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} +; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind { entry: %0 = sext i8 %in to i32 @@ -31,7 +31,7 @@ entry: } ; CHECK: @i16_arg -; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} +; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { entry: @@ -41,7 +41,7 @@ entry: } ; CHECK: @i16_zext_arg -; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} +; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind { entry: @@ -51,7 +51,7 @@ entry: } ; CHECK: @i16_sext_arg -; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} +; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind { entry: diff --git a/llvm/test/CodeGen/R600/unsupported-cc.ll b/llvm/test/CodeGen/R600/unsupported-cc.ll index b311f4c..cf29833 100644 --- a/llvm/test/CodeGen/R600/unsupported-cc.ll +++ b/llvm/test/CodeGen/R600/unsupported-cc.ll @@ -3,7 +3,7 @@ ; These tests are for condition codes that are not supported by the hardware ; CHECK: @slt -; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, +; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z ; CHECK-NEXT: 5(7.006492e-45) define void @slt(i32 addrspace(1)* %out, i32 %in) { entry: @@ -14,7 +14,7 @@ entry: } ; CHECK: @ult_i32 -; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, +; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z ; CHECK-NEXT: 5(7.006492e-45) define void @ult_i32(i32 addrspace(1)* %out, i32 %in) { entry: @@ -25,7 +25,7 @@ entry: } ; CHECK: @ult_float -; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, +; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z ; CHECK-NEXT: 1084227584(5.000000e+00) define void @ult_float(float addrspace(1)* %out, float %in) { entry: @@ -36,7 +36,7 @@ entry: } ; CHECK: @olt -; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, +; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z ;CHECK-NEXT: 1084227584(5.000000e+00) define void @olt(float addrspace(1)* %out, float %in) { entry: @@ -47,7 +47,7 @@ entry: } ; CHECK: @sle -; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, +; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z ; CHECK-NEXT: 6(8.407791e-45) define void @sle(i32 addrspace(1)* %out, i32 %in) { entry: @@ -58,7 +58,7 @@ entry: } ; CHECK: @ule_i32 -; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, +; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z ; CHECK-NEXT: 6(8.407791e-45) define void @ule_i32(i32 addrspace(1)* %out, i32 %in) { entry: @@ -69,7 +69,7 @@ entry: } ; CHECK: @ule_float -; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, +; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z ; CHECK-NEXT: 1084227584(5.000000e+00) define void @ule_float(float addrspace(1)* %out, float %in) { entry: @@ -80,7 +80,7 @@ entry: } ; CHECK: @ole -; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, +; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z ; CHECK-NEXT:1084227584(5.000000e+00) define void @ole(float addrspace(1)* %out, float %in) { entry: diff --git a/llvm/test/CodeGen/R600/vtx-schedule.ll b/llvm/test/CodeGen/R600/vtx-schedule.ll index a0c79e3..97d37ed 100644 --- a/llvm/test/CodeGen/R600/vtx-schedule.ll +++ b/llvm/test/CodeGen/R600/vtx-schedule.ll @@ -6,17 +6,13 @@ ; CHECK: @test ; CHECK: Fetch clause -; CHECK_VTX_READ_32 [[IN0:T[0-9]+\.X]], [[IN0]], 40 -; CHECK_VTX_READ_32 [[IN1:T[0-9]+\.X]], [[IN1]], 44 -; CHECK: Fetch clause ; CHECK_VTX_READ_32 [[IN0:T[0-9]+\.X]], [[IN0]], 0 +; CHECK: Fetch clause ; CHECK_VTX_READ_32 [[IN1:T[0-9]+\.X]], [[IN1]], 0 -define void @test(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in0, i32 addrspace(1)* nocapture %in1) { +define void @test(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* addrspace(1)* nocapture %in0) { entry: - %0 = load i32 addrspace(1)* %in0, align 4 - %1 = load i32 addrspace(1)* %in1, align 4 - %cmp.i = icmp slt i32 %0, %1 - %cond.i = select i1 %cmp.i, i32 %0, i32 %1 - store i32 %cond.i, i32 addrspace(1)* %out, align 4 + %0 = load i32 addrspace(1)* addrspace(1)* %in0 + %1 = load i32 addrspace(1)* %0 + store i32 %1, i32 addrspace(1)* %out ret void } diff --git a/llvm/test/CodeGen/R600/work-item-intrinsics.ll b/llvm/test/CodeGen/R600/work-item-intrinsics.ll index 46e3e54..7998983 100644 --- a/llvm/test/CodeGen/R600/work-item-intrinsics.ll +++ b/llvm/test/CodeGen/R600/work-item-intrinsics.ll @@ -3,7 +3,7 @@ ; R600-CHECK: @ngroups_x ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]] -; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 0 +; R600-CHECK: MOV * [[VAL]], KC0[0].X ; SI-CHECK: @ngroups_x ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 0 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] @@ -17,7 +17,7 @@ entry: ; R600-CHECK: @ngroups_y ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]] -; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 4 +; R600-CHECK: MOV * [[VAL]], KC0[0].Y ; SI-CHECK: @ngroups_y ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 1 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] @@ -31,7 +31,7 @@ entry: ; R600-CHECK: @ngroups_z ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]] -; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 8 +; R600-CHECK: MOV * [[VAL]], KC0[0].Z ; SI-CHECK: @ngroups_z ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 2 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] @@ -45,7 +45,7 @@ entry: ; R600-CHECK: @global_size_x ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]] -; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 12 +; R600-CHECK: MOV * [[VAL]], KC0[0].W ; SI-CHECK: @global_size_x ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 3 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] @@ -59,7 +59,7 @@ entry: ; R600-CHECK: @global_size_y ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]] -; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 16 +; R600-CHECK: MOV * [[VAL]], KC0[1].X ; SI-CHECK: @global_size_y ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 4 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] @@ -73,7 +73,7 @@ entry: ; R600-CHECK: @global_size_z ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]] -; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 20 +; R600-CHECK: MOV * [[VAL]], KC0[1].Y ; SI-CHECK: @global_size_z ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 5 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] @@ -87,7 +87,7 @@ entry: ; R600-CHECK: @local_size_x ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]] -; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 24 +; R600-CHECK: MOV * [[VAL]], KC0[1].Z ; SI-CHECK: @local_size_x ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 6 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] @@ -101,7 +101,7 @@ entry: ; R600-CHECK: @local_size_y ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]] -; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 28 +; R600-CHECK: MOV * [[VAL]], KC0[1].W ; SI-CHECK: @local_size_y ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 7 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] @@ -115,7 +115,7 @@ entry: ; R600-CHECK: @local_size_z ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]] -; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 32 +; R600-CHECK: MOV * [[VAL]], KC0[2].X ; SI-CHECK: @local_size_z ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 8 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]