From 4863477e22e02af046915ca2a33dbecfd0ed34b4 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard@amd.com>
Date: Fri, 25 May 2012 12:18:14 -0400
Subject: [PATCH] radeon/llvm: Use tablegen pattern to lower bitconvert

---
 src/gallium/drivers/radeon/AMDGPUInstructions.td   |   6 +
 src/gallium/drivers/radeon/AMDILISelLowering.cpp   | 285 ---------------------
 src/gallium/drivers/radeon/R600Instructions.td     |   5 +
 .../drivers/radeon/R600LowerInstructions.cpp       |   9 -
 4 files changed, 11 insertions(+), 294 deletions(-)
diff --git a/src/gallium/drivers/radeon/AMDGPUInstructions.td b/src/gallium/drivers/radeon/AMDGPUInstructions.td
index a004b9c..f4abbae 100644
--- a/src/gallium/drivers/radeon/AMDGPUInstructions.td
+++ b/src/gallium/drivers/radeon/AMDGPUInstructions.td
@@ -115,6 +115,12 @@ class Insert_Element <ValueType elem_type, ValueType vec_type,
   (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
 >;
 
+// bitconvert pattern
+class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
+  (dt (bitconvert (st rc:$src0))),
+  (dt rc:$src0)
+>;
+
 include "R600Instructions.td"
 
 include "SIInstrInfo.td"
diff --git a/src/gallium/drivers/radeon/AMDILISelLowering.cpp b/src/gallium/drivers/radeon/AMDILISelLowering.cpp
index 96d7e51..92cf9d6 100644
--- a/src/gallium/drivers/radeon/AMDILISelLowering.cpp
+++ b/src/gallium/drivers/radeon/AMDILISelLowering.cpp
@@ -640,7 +640,6 @@ AMDILTargetLowering::convertToReg(MachineOperand op) const
     setOperationAction(ISD::SREM, VT, Expand);
     setOperationAction(ISD::UINT_TO_FP, VT, Custom);
     setOperationAction(ISD::FP_TO_UINT, VT, Custom);
-    setOperationAction(ISDBITCAST, VT, Custom);
     setOperationAction(ISD::GlobalAddress, VT, Custom);
     setOperationAction(ISD::JumpTable, VT, Custom);
     setOperationAction(ISD::ConstantPool, VT, Custom);
@@ -1513,7 +1512,6 @@ AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
       LOWER(SELECT);
       LOWER(SETCC);
       LOWER(SIGN_EXTEND_INREG);
-      LOWER(BITCAST);
       LOWER(DYNAMIC_STACKALLOC);
       LOWER(BRCOND);
       LOWER(BR_CC);
@@ -3232,289 +3230,6 @@ AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
 }
 
 SDValue
-AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const
-{
-  SDValue Src = Op.getOperand(0);
-  SDValue Dst = Op;
-  SDValue Res;
-  DebugLoc DL = Op.getDebugLoc();
-  EVT SrcVT = Src.getValueType();
-  EVT DstVT = Dst.getValueType();
-  // Lets bitcast the floating point types to an
-  // equivalent integer type before converting to vectors.
-  if (SrcVT.getScalarType().isFloatingPoint()) {
-    Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType(
-          SrcVT.getScalarType().getSimpleVT().getSizeInBits(),
-          SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1),
-        Src);
-    SrcVT = Src.getValueType();
-  }
-  uint32_t ScalarSrcSize = SrcVT.getScalarType()
-    .getSimpleVT().getSizeInBits();
-  uint32_t ScalarDstSize = DstVT.getScalarType()
-    .getSimpleVT().getSizeInBits();
-  uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
-  uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1;
-  bool isVec = SrcVT.isVector();
-  if (DstVT.getScalarType().isInteger() &&
-      (SrcVT.getScalarType().isInteger()
-       || SrcVT.getScalarType().isFloatingPoint())) {
-    if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16)
-        || (ScalarSrcSize == 64
-          && DstNumEle == 4
-          && ScalarDstSize == 16)) {
-      // This is the problematic case when bitcasting i64 <-> <4 x i16>
-      // This approach is a little different as we cannot generate a
-      // <4 x i64> vector
-      // as that is illegal in our backend and we are already past
-      // the DAG legalizer.
-      // So, in this case, we will do the following conversion.
-      // Case 1:
-      // %dst = <4 x i16> %src bitconvert i64 ==>
-      // %tmp = <4 x i16> %src convert <4 x i32>
-      // %tmp = <4 x i32> %tmp and 0xFFFF
-      // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16>
-      // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw
-      // %dst = <2 x i32> %tmp bitcast i64
-      // case 2:
-      // %dst = i64 %src bitconvert <4 x i16> ==>
-      // %tmp = i64 %src bitcast <2 x i32>
-      // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy
-      // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16>
-      // %tmp = <4 x i32> %tmp and 0xFFFF
-      // %dst = <4 x i16> %tmp bitcast <4 x i32>
-      SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32,
-          DAG.getConstant(0xFFFF, MVT::i32));
-      SDValue const16 = DAG.getConstant(16, MVT::i32);
-      if (ScalarDstSize == 64) {
-        // case 1
-        Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32);
-        Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask);
-        SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
-            Op, DAG.getConstant(0, MVT::i32));
-        SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
-            Op, DAG.getConstant(1, MVT::i32));
-        y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16);
-        SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
-            Op, DAG.getConstant(2, MVT::i32));
-        SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
-            Op, DAG.getConstant(3, MVT::i32));
-        w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16);
-        x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y);
-        y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w);
-        Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y);
-        return Res;
-      } else {
-        // case 2
-        SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src);
-        SDValue lor16
-          = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16);
-        SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src);
-        SDValue hir16
-          = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16);
-        SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL,
-            MVT::v4i32, lo);
-        SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
-            getPointerTy(), DAG.getConstant(1, MVT::i32));
-        resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
-            resVec, lor16, idxVal);
-        idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
-            getPointerTy(), DAG.getConstant(2, MVT::i32));
-        resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
-            resVec, hi, idxVal);
-        idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
-            getPointerTy(), DAG.getConstant(3, MVT::i32));
-        resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
-            resVec, hir16, idxVal);
-        resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask);
-        Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16);
-        return Res;
-      }
-    } else {
-      // There are four cases we need to worry about for bitcasts
-      // where the size of all
-      // source, intermediates and result is <= 128 bits, unlike
-      // the above case
-      // 1) Sub32bit bitcast 32bitAlign
-      // %dst = <4 x i8> bitcast i32
-      // (also <[2|4] x i16> to <[2|4] x i32>)
-      // 2) 32bitAlign bitcast Sub32bit
-      // %dst = i32 bitcast <4 x i8>
-      // 3) Sub32bit bitcast LargerSub32bit
-      // %dst = <2 x i8> bitcast i16
-      // (also <4 x i8> to <2 x i16>)
-      // 4) Sub32bit bitcast SmallerSub32bit
-      // %dst = i16 bitcast <2 x i8>
-      // (also <2 x i16> to <4 x i8>)
-      // This also only handles types that are powers of two
-      if ((ScalarDstSize & (ScalarDstSize - 1))
-          || (ScalarSrcSize & (ScalarSrcSize - 1))) {
-      } else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) {
-        // case 1:
-        EVT IntTy = genIntType(ScalarDstSize, SrcNumEle);
-#if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors
-        SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy);
-#else
-        SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
-            DAG.getUNDEF(IntTy.getScalarType()));
-        for (uint32_t x = 0; x < SrcNumEle; ++x) {
-          SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
-              getPointerTy(), DAG.getConstant(x, MVT::i32));
-          SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
-              SrcVT.getScalarType(), Src,
-              DAG.getConstant(x, MVT::i32));
-          temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType());
-          res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy,
-              res, temp, idx);
-        }
-#endif
-        SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
-            DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32));
-        SDValue *newEle = new SDValue[SrcNumEle];
-        res = DAG.getNode(ISD::AND, DL, IntTy, res, mask);
-        for (uint32_t x = 0; x < SrcNumEle; ++x) {
-          newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
-              IntTy.getScalarType(), res,
-              DAG.getConstant(x, MVT::i32));
-        }
-        uint32_t Ratio = SrcNumEle / DstNumEle;
-        for (uint32_t x = 0; x < SrcNumEle; ++x) {
-          if (x % Ratio) {
-            newEle[x] = DAG.getNode(ISD::SHL, DL,
-                IntTy.getScalarType(), newEle[x],
-                DAG.getConstant(ScalarSrcSize * (x % Ratio),
-                  MVT::i32));
-          }
-        }
-        for (uint32_t x = 0; x < SrcNumEle; x += 2) {
-          newEle[x] = DAG.getNode(ISD::OR, DL,
-              IntTy.getScalarType(), newEle[x], newEle[x + 1]);
-        }
-        if (ScalarSrcSize == 8) {
-          for (uint32_t x = 0; x < SrcNumEle; x += 4) {
-            newEle[x] = DAG.getNode(ISD::OR, DL,
-                IntTy.getScalarType(), newEle[x], newEle[x + 2]);
-          }
-          if (DstNumEle == 1) {
-            Dst = newEle[0];
-          } else {
-            Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
-                newEle[0]);
-            for (uint32_t x = 1; x < DstNumEle; ++x) {
-              SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
-                  getPointerTy(), DAG.getConstant(x, MVT::i32));
-              Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
-                  DstVT, Dst, newEle[x * 4], idx);
-            }
-          }
-        } else {
-          if (DstNumEle == 1) {
-            Dst = newEle[0];
-          } else {
-            Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
-                newEle[0]);
-            for (uint32_t x = 1; x < DstNumEle; ++x) {
-              SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
-                  getPointerTy(), DAG.getConstant(x, MVT::i32));
-              Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
-                  DstVT, Dst, newEle[x * 2], idx);
-            }
-          }
-        }
-        delete [] newEle;
-        return Dst;
-      } else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) {
-        // case 2:
-        EVT IntTy = genIntType(ScalarSrcSize, DstNumEle);
-        SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
-            DAG.getUNDEF(IntTy.getScalarType()));
-        uint32_t mult = (ScalarDstSize == 8) ? 4 : 2;
-        for (uint32_t x = 0; x < SrcNumEle; ++x) {
-          for (uint32_t y = 0; y < mult; ++y) {
-            SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
-                getPointerTy(),
-                DAG.getConstant(x * mult + y, MVT::i32));
-            SDValue t;
-            if (SrcNumEle > 1) {
-              t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
-                  DL, SrcVT.getScalarType(), Src,
-                  DAG.getConstant(x, MVT::i32));
-            } else {
-              t = Src;
-            }
-            if (y != 0) {
-              t = DAG.getNode(ISD::SRL, DL, t.getValueType(),
-                  t, DAG.getConstant(y * ScalarDstSize,
-                    MVT::i32));
-            }
-            vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,
-                DL, IntTy, vec, t, idx);
-          }
-        }
-        Dst = DAG.getSExtOrTrunc(vec, DL, DstVT);
-        return Dst;
-      } else if (ScalarDstSize == 16 && ScalarSrcSize == 8) {
-        // case 3:
-        SDValue *numEle = new SDValue[SrcNumEle];
-        for (uint32_t x = 0; x < SrcNumEle; ++x) {
-          numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
-              MVT::i8, Src, DAG.getConstant(x, MVT::i32));
-          numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16);
-          numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x],
-              DAG.getConstant(0xFF, MVT::i16));
-        }
-        for (uint32_t x = 1; x < SrcNumEle; x += 2) {
-          numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x],
-              DAG.getConstant(8, MVT::i16));
-          numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16,
-              numEle[x-1], numEle[x]);
-        }
-        if (DstNumEle > 1) {
-          // If we are not a scalar i16, the only other case is a
-          // v2i16 since we can't have v8i8 at this point, v4i16
-          // cannot be generated
-          Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16,
-              numEle[0]);
-          SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
-              getPointerTy(), DAG.getConstant(1, MVT::i32));
-          Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16,
-              Dst, numEle[2], idx);
-        } else {
-          Dst = numEle[0];
-        }
-        delete [] numEle;
-        return Dst;
-      } else if (ScalarDstSize == 8 && ScalarSrcSize == 16) {
-        // case 4:
-        SDValue *numEle = new SDValue[DstNumEle];
-        for (uint32_t x = 0; x < SrcNumEle; ++x) {
-          numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
-              MVT::i16, Src, DAG.getConstant(x, MVT::i32));
-          numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16,
-              numEle[x * 2], DAG.getConstant(8, MVT::i16));
-        }
-        MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16;
-        Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]);
-        for (uint32_t x = 1; x < DstNumEle; ++x) {
-          SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
-              getPointerTy(), DAG.getConstant(x, MVT::i32));
-          Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty,
-              Dst, numEle[x], idx);
-        }
-        delete [] numEle;
-        ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8;
-        Res = DAG.getSExtOrTrunc(Dst, DL, ty);
-        return Res;
-      }
-    }
-  } 
-  Res = DAG.getNode(AMDILISD::BITCONV,
-      Dst.getDebugLoc(),
-      Dst.getValueType(), Src);
-  return Res;
-}
-
-SDValue
 AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
     SelectionDAG &DAG) const
 {
diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td
index a2a509e..f038736 100644
--- a/src/gallium/drivers/radeon/R600Instructions.td
+++ b/src/gallium/drivers/radeon/R600Instructions.td
@@ -1164,4 +1164,9 @@ def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 5, sel_y>;
 def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 6, sel_z>;
 def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 7, sel_w>;
 
+// bitconvert patterns
+
+def : BitConvert <i32, f32, R600_Reg32>;
+def : BitConvert <f32, i32, R600_Reg32>;
+
 } // End isR600toCayman Predicate
diff --git a/src/gallium/drivers/radeon/R600LowerInstructions.cpp b/src/gallium/drivers/radeon/R600LowerInstructions.cpp
index 3b96b19..70c9b8b 100644
--- a/src/gallium/drivers/radeon/R600LowerInstructions.cpp
+++ b/src/gallium/drivers/radeon/R600LowerInstructions.cpp
@@ -164,15 +164,6 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
           break;
         }
 
-      case AMDIL::IL_ASINT_f32:
-      case AMDIL::IL_ASINT_i32:
-      case AMDIL::IL_ASFLOAT_f32:
-      case AMDIL::IL_ASFLOAT_i32:
-        BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::COPY))
-                .addOperand(MI.getOperand(0))
-                .addOperand(MI.getOperand(1));
-        break;
-
       case AMDIL::ILT:
         BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGT_INT))
                 .addOperand(MI.getOperand(0))
-- 
2.7.4