From 4863477e22e02af046915ca2a33dbecfd0ed34b4 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 25 May 2012 12:18:14 -0400 Subject: [PATCH] radeon/llvm: Use tablegen pattern to lower bitconvert --- src/gallium/drivers/radeon/AMDGPUInstructions.td | 6 + src/gallium/drivers/radeon/AMDILISelLowering.cpp | 285 --------------------- src/gallium/drivers/radeon/R600Instructions.td | 5 + .../drivers/radeon/R600LowerInstructions.cpp | 9 - 4 files changed, 11 insertions(+), 294 deletions(-) diff --git a/src/gallium/drivers/radeon/AMDGPUInstructions.td b/src/gallium/drivers/radeon/AMDGPUInstructions.td index a004b9c..f4abbae 100644 --- a/src/gallium/drivers/radeon/AMDGPUInstructions.td +++ b/src/gallium/drivers/radeon/AMDGPUInstructions.td @@ -115,6 +115,12 @@ class Insert_Element ; +// bitconvert pattern +class BitConvert : Pat < + (dt (bitconvert (st rc:$src0))), + (dt rc:$src0) +>; + include "R600Instructions.td" include "SIInstrInfo.td" diff --git a/src/gallium/drivers/radeon/AMDILISelLowering.cpp b/src/gallium/drivers/radeon/AMDILISelLowering.cpp index 96d7e51..92cf9d6 100644 --- a/src/gallium/drivers/radeon/AMDILISelLowering.cpp +++ b/src/gallium/drivers/radeon/AMDILISelLowering.cpp @@ -640,7 +640,6 @@ AMDILTargetLowering::convertToReg(MachineOperand op) const setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::UINT_TO_FP, VT, Custom); setOperationAction(ISD::FP_TO_UINT, VT, Custom); - setOperationAction(ISDBITCAST, VT, Custom); setOperationAction(ISD::GlobalAddress, VT, Custom); setOperationAction(ISD::JumpTable, VT, Custom); setOperationAction(ISD::ConstantPool, VT, Custom); @@ -1513,7 +1512,6 @@ AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const LOWER(SELECT); LOWER(SETCC); LOWER(SIGN_EXTEND_INREG); - LOWER(BITCAST); LOWER(DYNAMIC_STACKALLOC); LOWER(BRCOND); LOWER(BR_CC); @@ -3232,289 +3230,6 @@ AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const } SDValue -AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const -{ - SDValue Src = Op.getOperand(0); - SDValue Dst = Op; - SDValue Res; - DebugLoc DL = Op.getDebugLoc(); - EVT SrcVT = Src.getValueType(); - EVT DstVT = Dst.getValueType(); - // Lets bitcast the floating point types to an - // equivalent integer type before converting to vectors. - if (SrcVT.getScalarType().isFloatingPoint()) { - Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType( - SrcVT.getScalarType().getSimpleVT().getSizeInBits(), - SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1), - Src); - SrcVT = Src.getValueType(); - } - uint32_t ScalarSrcSize = SrcVT.getScalarType() - .getSimpleVT().getSizeInBits(); - uint32_t ScalarDstSize = DstVT.getScalarType() - .getSimpleVT().getSizeInBits(); - uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; - uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1; - bool isVec = SrcVT.isVector(); - if (DstVT.getScalarType().isInteger() && - (SrcVT.getScalarType().isInteger() - || SrcVT.getScalarType().isFloatingPoint())) { - if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16) - || (ScalarSrcSize == 64 - && DstNumEle == 4 - && ScalarDstSize == 16)) { - // This is the problematic case when bitcasting i64 <-> <4 x i16> - // This approach is a little different as we cannot generate a - // <4 x i64> vector - // as that is illegal in our backend and we are already past - // the DAG legalizer. - // So, in this case, we will do the following conversion. - // Case 1: - // %dst = <4 x i16> %src bitconvert i64 ==> - // %tmp = <4 x i16> %src convert <4 x i32> - // %tmp = <4 x i32> %tmp and 0xFFFF - // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16> - // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw - // %dst = <2 x i32> %tmp bitcast i64 - // case 2: - // %dst = i64 %src bitconvert <4 x i16> ==> - // %tmp = i64 %src bitcast <2 x i32> - // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy - // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16> - // %tmp = <4 x i32> %tmp and 0xFFFF - // %dst = <4 x i16> %tmp bitcast <4 x i32> - SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32, - DAG.getConstant(0xFFFF, MVT::i32)); - SDValue const16 = DAG.getConstant(16, MVT::i32); - if (ScalarDstSize == 64) { - // case 1 - Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32); - Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask); - SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, - Op, DAG.getConstant(0, MVT::i32)); - SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, - Op, DAG.getConstant(1, MVT::i32)); - y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16); - SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, - Op, DAG.getConstant(2, MVT::i32)); - SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, - Op, DAG.getConstant(3, MVT::i32)); - w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16); - x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y); - y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w); - Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y); - return Res; - } else { - // case 2 - SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src); - SDValue lor16 - = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16); - SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src); - SDValue hir16 - = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16); - SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL, - MVT::v4i32, lo); - SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL, - getPointerTy(), DAG.getConstant(1, MVT::i32)); - resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, - resVec, lor16, idxVal); - idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL, - getPointerTy(), DAG.getConstant(2, MVT::i32)); - resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, - resVec, hi, idxVal); - idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL, - getPointerTy(), DAG.getConstant(3, MVT::i32)); - resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, - resVec, hir16, idxVal); - resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask); - Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16); - return Res; - } - } else { - // There are four cases we need to worry about for bitcasts - // where the size of all - // source, intermediates and result is <= 128 bits, unlike - // the above case - // 1) Sub32bit bitcast 32bitAlign - // %dst = <4 x i8> bitcast i32 - // (also <[2|4] x i16> to <[2|4] x i32>) - // 2) 32bitAlign bitcast Sub32bit - // %dst = i32 bitcast <4 x i8> - // 3) Sub32bit bitcast LargerSub32bit - // %dst = <2 x i8> bitcast i16 - // (also <4 x i8> to <2 x i16>) - // 4) Sub32bit bitcast SmallerSub32bit - // %dst = i16 bitcast <2 x i8> - // (also <2 x i16> to <4 x i8>) - // This also only handles types that are powers of two - if ((ScalarDstSize & (ScalarDstSize - 1)) - || (ScalarSrcSize & (ScalarSrcSize - 1))) { - } else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) { - // case 1: - EVT IntTy = genIntType(ScalarDstSize, SrcNumEle); -#if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors - SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy); -#else - SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy, - DAG.getUNDEF(IntTy.getScalarType())); - for (uint32_t x = 0; x < SrcNumEle; ++x) { - SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, - getPointerTy(), DAG.getConstant(x, MVT::i32)); - SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - SrcVT.getScalarType(), Src, - DAG.getConstant(x, MVT::i32)); - temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType()); - res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy, - res, temp, idx); - } -#endif - SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy, - DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32)); - SDValue *newEle = new SDValue[SrcNumEle]; - res = DAG.getNode(ISD::AND, DL, IntTy, res, mask); - for (uint32_t x = 0; x < SrcNumEle; ++x) { - newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - IntTy.getScalarType(), res, - DAG.getConstant(x, MVT::i32)); - } - uint32_t Ratio = SrcNumEle / DstNumEle; - for (uint32_t x = 0; x < SrcNumEle; ++x) { - if (x % Ratio) { - newEle[x] = DAG.getNode(ISD::SHL, DL, - IntTy.getScalarType(), newEle[x], - DAG.getConstant(ScalarSrcSize * (x % Ratio), - MVT::i32)); - } - } - for (uint32_t x = 0; x < SrcNumEle; x += 2) { - newEle[x] = DAG.getNode(ISD::OR, DL, - IntTy.getScalarType(), newEle[x], newEle[x + 1]); - } - if (ScalarSrcSize == 8) { - for (uint32_t x = 0; x < SrcNumEle; x += 4) { - newEle[x] = DAG.getNode(ISD::OR, DL, - IntTy.getScalarType(), newEle[x], newEle[x + 2]); - } - if (DstNumEle == 1) { - Dst = newEle[0]; - } else { - Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT, - newEle[0]); - for (uint32_t x = 1; x < DstNumEle; ++x) { - SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, - getPointerTy(), DAG.getConstant(x, MVT::i32)); - Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, - DstVT, Dst, newEle[x * 4], idx); - } - } - } else { - if (DstNumEle == 1) { - Dst = newEle[0]; - } else { - Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT, - newEle[0]); - for (uint32_t x = 1; x < DstNumEle; ++x) { - SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, - getPointerTy(), DAG.getConstant(x, MVT::i32)); - Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, - DstVT, Dst, newEle[x * 2], idx); - } - } - } - delete [] newEle; - return Dst; - } else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) { - // case 2: - EVT IntTy = genIntType(ScalarSrcSize, DstNumEle); - SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy, - DAG.getUNDEF(IntTy.getScalarType())); - uint32_t mult = (ScalarDstSize == 8) ? 4 : 2; - for (uint32_t x = 0; x < SrcNumEle; ++x) { - for (uint32_t y = 0; y < mult; ++y) { - SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, - getPointerTy(), - DAG.getConstant(x * mult + y, MVT::i32)); - SDValue t; - if (SrcNumEle > 1) { - t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, SrcVT.getScalarType(), Src, - DAG.getConstant(x, MVT::i32)); - } else { - t = Src; - } - if (y != 0) { - t = DAG.getNode(ISD::SRL, DL, t.getValueType(), - t, DAG.getConstant(y * ScalarDstSize, - MVT::i32)); - } - vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, - DL, IntTy, vec, t, idx); - } - } - Dst = DAG.getSExtOrTrunc(vec, DL, DstVT); - return Dst; - } else if (ScalarDstSize == 16 && ScalarSrcSize == 8) { - // case 3: - SDValue *numEle = new SDValue[SrcNumEle]; - for (uint32_t x = 0; x < SrcNumEle; ++x) { - numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - MVT::i8, Src, DAG.getConstant(x, MVT::i32)); - numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16); - numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x], - DAG.getConstant(0xFF, MVT::i16)); - } - for (uint32_t x = 1; x < SrcNumEle; x += 2) { - numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x], - DAG.getConstant(8, MVT::i16)); - numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16, - numEle[x-1], numEle[x]); - } - if (DstNumEle > 1) { - // If we are not a scalar i16, the only other case is a - // v2i16 since we can't have v8i8 at this point, v4i16 - // cannot be generated - Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16, - numEle[0]); - SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, - getPointerTy(), DAG.getConstant(1, MVT::i32)); - Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16, - Dst, numEle[2], idx); - } else { - Dst = numEle[0]; - } - delete [] numEle; - return Dst; - } else if (ScalarDstSize == 8 && ScalarSrcSize == 16) { - // case 4: - SDValue *numEle = new SDValue[DstNumEle]; - for (uint32_t x = 0; x < SrcNumEle; ++x) { - numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - MVT::i16, Src, DAG.getConstant(x, MVT::i32)); - numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16, - numEle[x * 2], DAG.getConstant(8, MVT::i16)); - } - MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16; - Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]); - for (uint32_t x = 1; x < DstNumEle; ++x) { - SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, - getPointerTy(), DAG.getConstant(x, MVT::i32)); - Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty, - Dst, numEle[x], idx); - } - delete [] numEle; - ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8; - Res = DAG.getSExtOrTrunc(Dst, DL, ty); - return Res; - } - } - } - Res = DAG.getNode(AMDILISD::BITCONV, - Dst.getDebugLoc(), - Dst.getValueType(), Src); - return Res; -} - -SDValue AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index a2a509e..f038736 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -1164,4 +1164,9 @@ def : Insert_Element ; def : Insert_Element ; def : Insert_Element ; +// bitconvert patterns + +def : BitConvert ; +def : BitConvert ; + } // End isR600toCayman Predicate diff --git a/src/gallium/drivers/radeon/R600LowerInstructions.cpp b/src/gallium/drivers/radeon/R600LowerInstructions.cpp index 3b96b19..70c9b8b 100644 --- a/src/gallium/drivers/radeon/R600LowerInstructions.cpp +++ b/src/gallium/drivers/radeon/R600LowerInstructions.cpp @@ -164,15 +164,6 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF) break; } - case AMDIL::IL_ASINT_f32: - case AMDIL::IL_ASINT_i32: - case AMDIL::IL_ASFLOAT_f32: - case AMDIL::IL_ASFLOAT_i32: - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::COPY)) - .addOperand(MI.getOperand(0)) - .addOperand(MI.getOperand(1)); - break; - case AMDIL::ILT: BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGT_INT)) .addOperand(MI.getOperand(0)) -- 2.7.4