From bdd690dc3667f66cbe87974f18e247cf1e6f9c5f Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Tue, 23 Feb 2016 13:47:24 -0600 Subject: [PATCH] swr: [rasterizer jitter] Cleanup use of types inside of Builder. Also, cached the simd width since we don't have to keep querying the JitManager for it. --- .../drivers/swr/rasterizer/jitter/builder.cpp | 16 ++++-- .../drivers/swr/rasterizer/jitter/builder.h | 6 +++ .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 58 +++++++++++----------- .../drivers/swr/rasterizer/jitter/fetch_jit.cpp | 58 +++++++++++----------- 4 files changed, 75 insertions(+), 63 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp index c15bdf1..757ea3f 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp @@ -38,6 +38,8 @@ using namespace llvm; Builder::Builder(JitManager *pJitMgr) : mpJitMgr(pJitMgr) { + mVWidth = pJitMgr->mVWidth; + mpIRBuilder = &pJitMgr->mBuilder; mVoidTy = Type::getVoidTy(pJitMgr->mContext); @@ -48,14 +50,18 @@ Builder::Builder(JitManager *pJitMgr) mInt8Ty = Type::getInt8Ty(pJitMgr->mContext); mInt16Ty = Type::getInt16Ty(pJitMgr->mContext); mInt32Ty = Type::getInt32Ty(pJitMgr->mContext); + mInt8PtrTy = PointerType::get(mInt8Ty, 0); + mInt16PtrTy = PointerType::get(mInt16Ty, 0); + mInt32PtrTy = PointerType::get(mInt32Ty, 0); mInt64Ty = Type::getInt64Ty(pJitMgr->mContext); mV4FP32Ty = StructType::get(pJitMgr->mContext, std::vector(4, mFP32Ty), false); // vector4 float type (represented as structure) mV4Int32Ty = StructType::get(pJitMgr->mContext, std::vector(4, mInt32Ty), false); // vector4 int type - mSimdInt16Ty = VectorType::get(mInt16Ty, mpJitMgr->mVWidth); - mSimdInt32Ty = VectorType::get(mInt32Ty, mpJitMgr->mVWidth); - mSimdInt64Ty = VectorType::get(mInt64Ty, mpJitMgr->mVWidth); - mSimdFP16Ty = VectorType::get(mFP16Ty, mpJitMgr->mVWidth); - mSimdFP32Ty = VectorType::get(mFP32Ty, mpJitMgr->mVWidth); + mSimdInt16Ty = VectorType::get(mInt16Ty, mVWidth); + mSimdInt32Ty = VectorType::get(mInt32Ty, mVWidth); + mSimdInt64Ty = VectorType::get(mInt64Ty, mVWidth); + mSimdFP16Ty = VectorType::get(mFP16Ty, mVWidth); + mSimdFP32Ty = VectorType::get(mFP32Ty, mVWidth); + mSimdVectorTy = StructType::get(pJitMgr->mContext, std::vector(4, mSimdFP32Ty), false); if (sizeof(uint32_t*) == 4) { diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.h b/src/gallium/drivers/swr/rasterizer/jitter/builder.h index 4921661..239ef2a 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.h @@ -43,6 +43,8 @@ struct Builder JitManager* mpJitMgr; IRBuilder<>* mpIRBuilder; + uint32_t mVWidth; + // Built in types. Type* mVoidTy; Type* mInt1Ty; @@ -54,12 +56,16 @@ struct Builder Type* mFP16Ty; Type* mFP32Ty; Type* mDoubleTy; + Type* mInt8PtrTy; + Type* mInt16PtrTy; + Type* mInt32PtrTy; Type* mSimdFP16Ty; Type* mSimdFP32Ty; Type* mSimdInt16Ty; Type* mSimdInt32Ty; Type* mSimdInt64Ty; Type* mSimdIntPtrTy; + Type* mSimdVectorTy; StructType* mV4FP32Ty; StructType* mV4Int32Ty; diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp index 2ff77bc..7ebaca0 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp @@ -189,32 +189,32 @@ Constant *Builder::PRED(bool pred) Value *Builder::VIMMED1(int i) { - return ConstantVector::getSplat(JM()->mVWidth, cast(C(i))); + return ConstantVector::getSplat(mVWidth, cast(C(i))); } Value *Builder::VIMMED1(uint32_t i) { - return ConstantVector::getSplat(JM()->mVWidth, cast(C(i))); + return ConstantVector::getSplat(mVWidth, cast(C(i))); } Value *Builder::VIMMED1(float i) { - return ConstantVector::getSplat(JM()->mVWidth, cast(C(i))); + return ConstantVector::getSplat(mVWidth, cast(C(i))); } Value *Builder::VIMMED1(bool i) { - return ConstantVector::getSplat(JM()->mVWidth, cast(C(i))); + return ConstantVector::getSplat(mVWidth, cast(C(i))); } Value *Builder::VUNDEF_IPTR() { - return UndefValue::get(VectorType::get(PointerType::get(mInt32Ty, 0),JM()->mVWidth)); + return UndefValue::get(VectorType::get(mInt32PtrTy,mVWidth)); } Value *Builder::VUNDEF_I() { - return UndefValue::get(VectorType::get(mInt32Ty, JM()->mVWidth)); + return UndefValue::get(VectorType::get(mInt32Ty, mVWidth)); } Value *Builder::VUNDEF(Type *ty, uint32_t size) @@ -224,12 +224,12 @@ Value *Builder::VUNDEF(Type *ty, uint32_t size) Value *Builder::VUNDEF_F() { - return UndefValue::get(VectorType::get(mFP32Ty, JM()->mVWidth)); + return UndefValue::get(VectorType::get(mFP32Ty, mVWidth)); } Value *Builder::VUNDEF(Type* t) { - return UndefValue::get(VectorType::get(t, JM()->mVWidth)); + return UndefValue::get(VectorType::get(t, mVWidth)); } #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 6 @@ -247,7 +247,7 @@ Value *Builder::VBROADCAST(Value *src) return src; } - return VECTOR_SPLAT(JM()->mVWidth, src); + return VECTOR_SPLAT(mVWidth, src); } uint32_t Builder::IMMED(Value* v) @@ -342,8 +342,8 @@ Value *Builder::MASKLOADD(Value* src,Value* mask) else { Function *func = Intrinsic::getDeclaration(JM()->mpCurrentModule,Intrinsic::x86_avx_maskload_ps_256); - Value* fMask = BITCAST(mask,VectorType::get(mFP32Ty,JM()->mVWidth)); - vResult = BITCAST(CALL(func,{src,fMask}), VectorType::get(mInt32Ty,JM()->mVWidth)); + Value* fMask = BITCAST(mask,VectorType::get(mFP32Ty,mVWidth)); + vResult = BITCAST(CALL(func,{src,fMask}), VectorType::get(mInt32Ty,mVWidth)); } return vResult; } @@ -575,7 +575,7 @@ Value *Builder::GATHERPS(Value* vSrc, Value* pBase, Value* vIndices, Value* vMas Value *vScaleVec = VBROADCAST(Z_EXT(scale,mInt32Ty)); Value *vOffsets = MUL(vIndices,vScaleVec); Value *mask = MASK(vMask); - for(uint32_t i = 0; i < JM()->mVWidth; ++i) + for(uint32_t i = 0; i < mVWidth; ++i) { // single component byte index Value *offset = VEXTRACT(vOffsets,C(i)); @@ -625,7 +625,7 @@ Value *Builder::GATHERDD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMas Value *vScaleVec = VBROADCAST(Z_EXT(scale, mInt32Ty)); Value *vOffsets = MUL(vIndices, vScaleVec); Value *mask = MASK(vMask); - for(uint32_t i = 0; i < JM()->mVWidth; ++i) + for(uint32_t i = 0; i < mVWidth; ++i) { // single component byte index Value *offset = VEXTRACT(vOffsets, C(i)); @@ -800,7 +800,7 @@ Value *Builder::CVTPH2PS(Value* a) } Value* pResult = UndefValue::get(mSimdFP32Ty); - for (uint32_t i = 0; i < JM()->mVWidth; ++i) + for (uint32_t i = 0; i < mVWidth; ++i) { Value* pSrc = VEXTRACT(a, C(i)); Value* pConv = CALL(pCvtPh2Ps, std::initializer_list{pSrc}); @@ -833,7 +833,7 @@ Value *Builder::CVTPS2PH(Value* a, Value* rounding) } Value* pResult = UndefValue::get(mSimdInt16Ty); - for (uint32_t i = 0; i < JM()->mVWidth; ++i) + for (uint32_t i = 0; i < mVWidth; ++i) { Value* pSrc = VEXTRACT(a, C(i)); Value* pConv = CALL(pCvtPs2Ph, std::initializer_list{pSrc}); @@ -1085,8 +1085,8 @@ void Builder::GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byt void Builder::Shuffle16bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput[2], Value* vGatherOutput[4], bool bPackedOutput) { // cast types - Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), JM()->mVWidth); - Type* v32x8Ty = VectorType::get(mInt8Ty, JM()->mVWidth * 4); // vwidth is units of 32 bits + Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth); + Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits // input could either be float or int vector; do shuffle work in int vGatherInput[0] = BITCAST(vGatherInput[0], mSimdInt32Ty); @@ -1094,7 +1094,7 @@ void Builder::Shuffle16bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInp if(bPackedOutput) { - Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), JM()->mVWidth / 4); // vwidth is units of 32 bits + Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits // shuffle mask Value* vConstMask = C({0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15, @@ -1179,12 +1179,12 @@ void Builder::Shuffle16bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInp void Builder::Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, Value* vGatherOutput[], bool bPackedOutput) { // cast types - Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), JM()->mVWidth); - Type* v32x8Ty = VectorType::get(mInt8Ty, JM()->mVWidth * 4 ); // vwidth is units of 32 bits + Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth); + Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4 ); // vwidth is units of 32 bits if(bPackedOutput) { - Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), JM()->mVWidth / 4); // vwidth is units of 32 bits + Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits // shuffle mask Value* vConstMask = C({0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}); @@ -1292,7 +1292,7 @@ void Builder::SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask) Value* vTmpPtr = ALLOCA(pSrcTy); Value *mask = MASK(vMask); - for (uint32_t i = 0; i < JM()->mVWidth; ++i) + for (uint32_t i = 0; i < mVWidth; ++i) { Value *offset = VEXTRACT(vOffsets, C(i)); // byte pointer to component @@ -1415,8 +1415,8 @@ Value *Builder::VEXTRACTI128(Value* a, Constant* imm8) #else bool flag = !imm8->isZeroValue(); SmallVector idx; - for (unsigned i = 0; i < JM()->mVWidth / 2; i++) { - idx.push_back(C(flag ? i + JM()->mVWidth / 2 : i)); + for (unsigned i = 0; i < mVWidth / 2; i++) { + idx.push_back(C(flag ? i + mVWidth / 2 : i)); } return VSHUFFLE(a, VUNDEF_I(), ConstantVector::get(idx)); #endif @@ -1432,17 +1432,17 @@ Value *Builder::VINSERTI128(Value* a, Value* b, Constant* imm8) #else bool flag = !imm8->isZeroValue(); SmallVector idx; - for (unsigned i = 0; i < JM()->mVWidth; i++) { + for (unsigned i = 0; i < mVWidth; i++) { idx.push_back(C(i)); } Value *inter = VSHUFFLE(b, VUNDEF_I(), ConstantVector::get(idx)); SmallVector idx2; - for (unsigned i = 0; i < JM()->mVWidth / 2; i++) { - idx2.push_back(C(flag ? i : i + JM()->mVWidth)); + for (unsigned i = 0; i < mVWidth / 2; i++) { + idx2.push_back(C(flag ? i : i + mVWidth)); } - for (unsigned i = JM()->mVWidth / 2; i < JM()->mVWidth; i++) { - idx2.push_back(C(flag ? i + JM()->mVWidth / 2 : i)); + for (unsigned i = mVWidth / 2; i < mVWidth; i++) { + idx2.push_back(C(flag ? i + mVWidth / 2 : i)); } return VSHUFFLE(a, inter, ConstantVector::get(idx2)); #endif diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index c5a180e..2ca0130 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -105,7 +105,7 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState) std::vector vtxInputIndices(2, C(0)); // GEP pVtxOut = GEP(pVtxOut, C(0)); - pVtxOut = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, JM()->mVWidth), 0)); + pVtxOut = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, mVWidth), 0)); // SWR_FETCH_CONTEXT::pStreams Value* streams = LOAD(fetchInfo,{0, SWR_FETCH_CONTEXT_pStreams}); @@ -220,8 +220,8 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* fet SWRL::UncheckedFixedVector vectors; - std::vector pMask(JM()->mVWidth); - for(uint32_t i = 0; i < JM()->mVWidth; ++i) + std::vector pMask(mVWidth); + for(uint32_t i = 0; i < mVWidth; ++i) { pMask[i] = (C(i < 4 ? i : 4)); } @@ -254,7 +254,7 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* fet Value* startVertexOffset = MUL(Z_EXT(startVertex, mInt64Ty), stride); // Load from the stream. - for(uint32_t lane = 0; lane < JM()->mVWidth; ++lane) + for(uint32_t lane = 0; lane < mVWidth; ++lane) { // Get index Value* index = VEXTRACT(vIndices, C(lane)); @@ -380,44 +380,44 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE &fetchState, Value* fet vectors.push_back(wvec); } - std::vector v01Mask(JM()->mVWidth); - std::vector v23Mask(JM()->mVWidth); - std::vector v02Mask(JM()->mVWidth); - std::vector v13Mask(JM()->mVWidth); + std::vector v01Mask(mVWidth); + std::vector v23Mask(mVWidth); + std::vector v02Mask(mVWidth); + std::vector v13Mask(mVWidth); // Concatenate the vectors together. elements[0] = VUNDEF_F(); elements[1] = VUNDEF_F(); elements[2] = VUNDEF_F(); elements[3] = VUNDEF_F(); - for(uint32_t b = 0, num4Wide = JM()->mVWidth / 4; b < num4Wide; ++b) + for(uint32_t b = 0, num4Wide = mVWidth / 4; b < num4Wide; ++b) { v01Mask[4 * b + 0] = C(0 + 4 * b); v01Mask[4 * b + 1] = C(1 + 4 * b); - v01Mask[4 * b + 2] = C(0 + 4 * b + JM()->mVWidth); - v01Mask[4 * b + 3] = C(1 + 4 * b + JM()->mVWidth); + v01Mask[4 * b + 2] = C(0 + 4 * b + mVWidth); + v01Mask[4 * b + 3] = C(1 + 4 * b + mVWidth); v23Mask[4 * b + 0] = C(2 + 4 * b); v23Mask[4 * b + 1] = C(3 + 4 * b); - v23Mask[4 * b + 2] = C(2 + 4 * b + JM()->mVWidth); - v23Mask[4 * b + 3] = C(3 + 4 * b + JM()->mVWidth); + v23Mask[4 * b + 2] = C(2 + 4 * b + mVWidth); + v23Mask[4 * b + 3] = C(3 + 4 * b + mVWidth); v02Mask[4 * b + 0] = C(0 + 4 * b); v02Mask[4 * b + 1] = C(2 + 4 * b); - v02Mask[4 * b + 2] = C(0 + 4 * b + JM()->mVWidth); - v02Mask[4 * b + 3] = C(2 + 4 * b + JM()->mVWidth); + v02Mask[4 * b + 2] = C(0 + 4 * b + mVWidth); + v02Mask[4 * b + 3] = C(2 + 4 * b + mVWidth); v13Mask[4 * b + 0] = C(1 + 4 * b); v13Mask[4 * b + 1] = C(3 + 4 * b); - v13Mask[4 * b + 2] = C(1 + 4 * b + JM()->mVWidth); - v13Mask[4 * b + 3] = C(3 + 4 * b + JM()->mVWidth); + v13Mask[4 * b + 2] = C(1 + 4 * b + mVWidth); + v13Mask[4 * b + 3] = C(3 + 4 * b + mVWidth); - std::vector iMask(JM()->mVWidth); - for(uint32_t i = 0; i < JM()->mVWidth; ++i) + std::vector iMask(mVWidth); + for(uint32_t i = 0; i < mVWidth; ++i) { if(((4 * b) <= i) && (i < (4 * (b + 1)))) { - iMask[i] = C(i % 4 + JM()->mVWidth); + iMask[i] = C(i % 4 + mVWidth); } else { @@ -805,7 +805,7 @@ Value* FetchJit::GetSimdValid8bitIndices(Value* pIndices, Value* pLastIndex) STORE(C((uint8_t)0), pZeroIndex); // Load a SIMD of index pointers - for(int64_t lane = 0; lane < JM()->mVWidth; lane++) + for(int64_t lane = 0; lane < mVWidth; lane++) { // Calculate the address of the requested index Value *pIndex = GEP(pIndices, C(lane)); @@ -840,7 +840,7 @@ Value* FetchJit::GetSimdValid16bitIndices(Value* pIndices, Value* pLastIndex) STORE(C((uint16_t)0), pZeroIndex); // Load a SIMD of index pointers - for(int64_t lane = 0; lane < JM()->mVWidth; lane++) + for(int64_t lane = 0; lane < mVWidth; lane++) { // Calculate the address of the requested index Value *pIndex = GEP(pIndices, C(lane)); @@ -925,13 +925,13 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args) const uint32_t (&swizzle)[4] = std::get<9>(args); // cast types - Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), JM()->mVWidth); - Type* v32x8Ty = VectorType::get(mInt8Ty, JM()->mVWidth * 4 ); // vwidth is units of 32 bits + Type* vGatherTy = mSimdInt32Ty; + Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4 ); // vwidth is units of 32 bits // have to do extra work for sign extending if ((extendType == Instruction::CastOps::SExt) || (extendType == Instruction::CastOps::SIToFP)){ - Type* v16x8Ty = VectorType::get(mInt8Ty, JM()->mVWidth * 2); // 8x16bit ints in a 128bit lane - Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), JM()->mVWidth / 4); // vwidth is units of 32 bits + Type* v16x8Ty = VectorType::get(mInt8Ty, mVWidth * 2); // 8x16bit ints in a 128bit lane + Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits // shuffle mask, including any swizzling const char x = (char)swizzle[0]; const char y = (char)swizzle[1]; @@ -1138,8 +1138,8 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args) Value* (&vVertexElements)[4] = std::get<8>(args); // cast types - Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), JM()->mVWidth); - Type* v32x8Ty = VectorType::get(mInt8Ty, JM()->mVWidth * 4); // vwidth is units of 32 bits + Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth); + Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits // have to do extra work for sign extending if ((extendType == Instruction::CastOps::SExt) || (extendType == Instruction::CastOps::SIToFP)|| @@ -1149,7 +1149,7 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args) bool bFP = (extendType == Instruction::CastOps::FPExt) ? true : false; Type* v8x16Ty = VectorType::get(mInt16Ty, 8); // 8x16bit in a 128bit lane - Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), JM()->mVWidth / 4); // vwidth is units of 32 bits + Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits // shuffle mask Value* vConstMask = C({0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15, -- 2.7.4