From 5a1f1e95f41f811b0a0bc7003b9e12d4b57593ec Mon Sep 17 00:00:00 2001
From: Zhigang Gong <zhigang.gong@intel.com>
Date: Tue, 22 Jul 2014 15:56:08 +0800
Subject: [PATCH] GBE: refactor the immediate class to support vector data
 type.

Signed-off-by: Zhigang Gong <zhigang.gong@intel.com>
Reviewed-by: "Song, Ruiling" <ruiling.song@intel.com>
---
 backend/src/backend/gen_insn_selection.cpp |  58 +++++------
 backend/src/ir/function.cpp                |  26 ++---
 backend/src/ir/immediate.hpp               | 162 +++++++++++++++++++++++++----
 backend/src/ir/instruction.cpp             |   2 +-
 backend/src/ir/lowering.cpp                |  12 +--
 backend/src/llvm/llvm_gen_backend.cpp      |  27 ++---
 6 files changed, 206 insertions(+), 81 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 23f99ca..090f897 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -1729,7 +1729,7 @@ namespace gbe
     using namespace ir;
     const auto &childInsn = cast<LoadImmInstruction>(insn);
     const auto &imm = childInsn.getImmediate();
-    if(imm.type != TYPE_DOUBLE && imm.type != TYPE_S64 && imm.type != TYPE_U64)
+    if(imm.getType() != TYPE_DOUBLE && imm.getType() != TYPE_S64 && imm.getType() != TYPE_U64)
       return true;
     return false;
   }
@@ -1739,15 +1739,15 @@ namespace gbe
     using namespace ir;
     int sign = negate ? -1 : 1;
     switch (type) {
-      case TYPE_U32:   return GenRegister::immud(imm.data.u32 * sign);
-      case TYPE_S32:   return GenRegister::immd(imm.data.s32 * sign);
-      case TYPE_FLOAT: return GenRegister::immf(imm.data.f32 * sign);
-      case TYPE_U16: return GenRegister::immuw(imm.data.u16 * sign);
-      case TYPE_S16: return  GenRegister::immw(imm.data.s16 * sign);
-      case TYPE_U8:  return GenRegister::immuw(imm.data.u8 * sign);
-      case TYPE_S8:  return GenRegister::immw(imm.data.s8 * sign);
-      case TYPE_DOUBLE: return GenRegister::immdf(imm.data.f64 * sign);
-      case TYPE_BOOL: return GenRegister::immuw(-imm.data.b);  //return 0xffff when true
+      case TYPE_U32:   return GenRegister::immud(imm.getIntegerValue() * sign);
+      case TYPE_S32:   return GenRegister::immd(imm.getIntegerValue() * sign);
+      case TYPE_FLOAT: return GenRegister::immf(imm.getFloatValue() * sign);
+      case TYPE_U16: return GenRegister::immuw(imm.getIntegerValue() * sign);
+      case TYPE_S16: return  GenRegister::immw((int16_t)imm.getIntegerValue() * sign);
+      case TYPE_U8:  return GenRegister::immuw(imm.getIntegerValue() * sign);
+      case TYPE_S8:  return GenRegister::immw((int8_t)imm.getIntegerValue() * sign);
+      case TYPE_DOUBLE: return GenRegister::immdf(imm.getDoubleValue() * sign);
+      case TYPE_BOOL: return GenRegister::immuw(-imm.getIntegerValue());  //return 0xffff when true
       default: NOT_SUPPORTED; return GenRegister::immuw(0);
     }
   }
@@ -2529,9 +2529,9 @@ namespace gbe
         if (src0DAG->insn.getOpcode() == OP_LOADI) {
           const auto &loadimm = cast<LoadImmInstruction>(src0DAG->insn);
           const Immediate imm = loadimm.getImmediate();
-          const Type type = imm.type;
+          const Type type = imm.getType();
           GBE_ASSERT(type == TYPE_U32 || type == TYPE_S32);
-          if (type == TYPE_U32 && imm.data.u32 <= 0xffff) {
+          if (type == TYPE_U32 && imm.getIntegerValue() <= 0xffff) {
             sel.push();
               if (sel.isScalarReg(insn.getDst(0)) == true) {
                 sel.curr.execWidth = 1;
@@ -2541,13 +2541,13 @@ namespace gbe
 
               sel.MUL(sel.selReg(dst, type),
                       sel.selReg(src1, type),
-                      GenRegister::immuw(imm.data.u32));
+                      GenRegister::immuw(imm.getIntegerValue()));
             sel.pop();
             if (dag.child[childID ^ 1] != NULL)
               dag.child[childID ^ 1]->isRoot = 1;
             return true;
           }
-          if (type == TYPE_S32 && (imm.data.s32 >= -32768 && imm.data.s32 <= 32767)) {
+          if (type == TYPE_S32 && (imm.getIntegerValue() >= -32768 && imm.getIntegerValue() <= 32767)) {
             sel.push();
               if (sel.isScalarReg(insn.getDst(0)) == true) {
                 sel.curr.execWidth = 1;
@@ -2557,7 +2557,7 @@ namespace gbe
 
               sel.MUL(sel.selReg(dst, type),
                       sel.selReg(src1, type),
-                      GenRegister::immw(imm.data.s32));
+                      GenRegister::immw(imm.getIntegerValue()));
             sel.pop();
             if (dag.child[childID ^ 1] != NULL)
               dag.child[childID ^ 1]->isRoot = 1;
@@ -2647,21 +2647,21 @@ namespace gbe
             sel.curr.physicalFlag = 0;
             sel.curr.flagIndex = (uint16_t) insn.getDst(0);
           }
-          sel.MOV(dst, imm.data.b ? GenRegister::immuw(0xffff) : GenRegister::immuw(0));
+          sel.MOV(dst, imm.getIntegerValue() ? GenRegister::immuw(0xffff) : GenRegister::immuw(0));
         break;
         case TYPE_U32:
         case TYPE_S32:
         case TYPE_FLOAT:
           sel.MOV(GenRegister::retype(dst, GEN_TYPE_F),
-                  GenRegister::immf(imm.data.f32));
+                  GenRegister::immf(imm.asFloatValue()));
         break;
-        case TYPE_U16: sel.MOV(dst, GenRegister::immuw(imm.data.u16)); break;
-        case TYPE_S16: sel.MOV(dst, GenRegister::immw(imm.data.s16)); break;
-        case TYPE_U8:  sel.MOV(dst, GenRegister::immuw(imm.data.u8)); break;
-        case TYPE_S8:  sel.MOV(dst, GenRegister::immw(imm.data.s8)); break;
-        case TYPE_DOUBLE: sel.LOAD_DF_IMM(dst, GenRegister::immdf(imm.data.f64), sel.selReg(sel.reg(FAMILY_QWORD))); break;
-        case TYPE_S64: sel.LOAD_INT64_IMM(dst, GenRegister::immint64(imm.data.s64)); break;
-        case TYPE_U64: sel.LOAD_INT64_IMM(dst, GenRegister::immint64(imm.data.u64)); break;
+        case TYPE_U16: sel.MOV(dst, GenRegister::immuw(imm.getIntegerValue())); break;
+        case TYPE_S16: sel.MOV(dst, GenRegister::immw(imm.getIntegerValue())); break;
+        case TYPE_U8:  sel.MOV(dst, GenRegister::immuw(imm.getIntegerValue())); break;
+        case TYPE_S8:  sel.MOV(dst, GenRegister::immw(imm.getIntegerValue())); break;
+        case TYPE_DOUBLE: sel.LOAD_DF_IMM(dst, GenRegister::immdf(imm.getDoubleValue()), sel.selReg(sel.reg(FAMILY_QWORD))); break;
+        case TYPE_S64: sel.LOAD_INT64_IMM(dst, GenRegister::immint64(imm.getIntegerValue())); break;
+        case TYPE_U64: sel.LOAD_INT64_IMM(dst, GenRegister::immint64(imm.getIntegerValue())); break;
         default: NOT_SUPPORTED;
       }
       sel.pop();
@@ -3296,13 +3296,13 @@ namespace gbe
         const auto imm = immInsn.getImmediate();
         const Type immType = immInsn.getType();
         if (immType == TYPE_S64 &&
-          imm.data.s64 <= INT_MAX &&
-          imm.data.s64 >= INT_MIN) {
-          src = GenRegister::immd((int32_t)imm.data.s64);
+          imm.getIntegerValue() <= INT_MAX &&
+          imm.getIntegerValue() >= INT_MIN) {
+          src = GenRegister::immd((int32_t)imm.getIntegerValue());
           return true;
         } else if (immType == TYPE_U64 &&
-                   imm.data.u64 <= UINT_MAX) {
-          src = GenRegister::immud((uint32_t)imm.data.s64);
+                   imm.getIntegerValue() <= UINT_MAX) {
+          src = GenRegister::immud((uint32_t)imm.getIntegerValue());
           return true;
         }
       } else if (dag->insn.getOpcode() == OP_CVT) {
diff --git a/backend/src/ir/function.cpp b/backend/src/ir/function.cpp
index 6c6e576..798e5ee 100644
--- a/backend/src/ir/function.cpp
+++ b/backend/src/ir/function.cpp
@@ -146,19 +146,19 @@ namespace ir {
   void Function::outImmediate(std::ostream &out, ImmediateIndex index) const {
     GBE_ASSERT(index < immediates.size());
     const Immediate imm = immediates[index];
-    switch (imm.type) {
-      case TYPE_BOOL: out << !!imm.data.u8; break;
-      case TYPE_S8: out << imm.data.s8; break;
-      case TYPE_U8: out << imm.data.u8; break;
-      case TYPE_S16: out << imm.data.s16; break;
-      case TYPE_U16: out << imm.data.u16; break;
-      case TYPE_S32: out << imm.data.s32; break;
-      case TYPE_U32: out << imm.data.u32; break;
-      case TYPE_S64: out << imm.data.s64; break;
-      case TYPE_U64: out << imm.data.u64; break;
-      case TYPE_HALF: out << "half(" << imm.data.u16 << ")"; break;
-      case TYPE_FLOAT: out << imm.data.f32; break;
-      case TYPE_DOUBLE: out << imm.data.f64; break;
+    switch (imm.getType()) {
+      case TYPE_BOOL: out << !!imm.getIntegerValue(); break;
+      case TYPE_S8:
+      case TYPE_U8:
+      case TYPE_S16:
+      case TYPE_U16:
+      case TYPE_S32:
+      case TYPE_U32:
+      case TYPE_S64: out << imm.getIntegerValue(); break;
+      case TYPE_U64: out << (uint64_t)imm.getIntegerValue(); break;
+      case TYPE_HALF: out << "half(" << imm.getIntegerValue() << ")"; break;
+      case TYPE_FLOAT: out << imm.getFloatValue(); break;
+      case TYPE_DOUBLE: out << imm.getDoubleValue(); break;
     }
   }
 
diff --git a/backend/src/ir/immediate.hpp b/backend/src/ir/immediate.hpp
index 67dd03f..1902fde 100644
--- a/backend/src/ir/immediate.hpp
+++ b/backend/src/ir/immediate.hpp
@@ -25,6 +25,7 @@
 #ifndef __GBE_IR_IMMEDIATE_HPP__
 #define __GBE_IR_IMMEDIATE_HPP__
 
+#include <string.h>
 #include "ir/type.hpp"
 #include "sys/platform.hpp"
 
@@ -36,12 +37,38 @@ namespace ir {
   {
   public:
     INLINE Immediate(void) {}
-#define DECL_CONSTRUCTOR(TYPE, FIELD, IR_TYPE)  \
-    Immediate(TYPE FIELD) {                     \
-      this->type = IR_TYPE;                     \
-      this->data.u64 = 0llu;                    \
-      this->data.FIELD = FIELD;                 \
+
+    Type getType(void) const {
+      return type;
+    }
+
+    uint32_t getTypeSize(void) const {
+      switch(type) {
+        default:
+          GBE_ASSERT(0 && "Invalid immeidate type.\n");
+        case TYPE_BOOL:
+        case TYPE_S8:
+        case TYPE_U8:   return 1;
+        case TYPE_S16:
+        case TYPE_U16:  return 2;
+        case TYPE_FLOAT:
+        case TYPE_S32:
+        case TYPE_U32:  return 4;
+        case TYPE_DOUBLE:
+        case TYPE_S64:
+        case TYPE_U64:  return 8;
+      }
     }
+
+#define DECL_CONSTRUCTOR(TYPE, FIELD, IR_TYPE)                  \
+    Immediate(TYPE FIELD) {                                     \
+      this->type = IR_TYPE;                                     \
+      this->elemNum = 1;                                        \
+      this->data.p = &defaultData;                              \
+      defaultData = 0ull;                                       \
+      *this->data.FIELD = FIELD;                                \
+    }
+
     DECL_CONSTRUCTOR(bool, b, TYPE_BOOL)
     DECL_CONSTRUCTOR(int8_t, s8, TYPE_S8)
     DECL_CONSTRUCTOR(uint8_t, u8, TYPE_U8)
@@ -54,28 +81,125 @@ namespace ir {
     DECL_CONSTRUCTOR(float, f32, TYPE_FLOAT)
     DECL_CONSTRUCTOR(double, f64, TYPE_DOUBLE)
 #undef DECL_CONSTRUCTOR
+
+#define DECL_CONSTRUCTOR(TYPE, FIELD, IR_TYPE, ELEMNUM)         \
+    Immediate(TYPE *FIELD, uint32_t ELEMNUM) {                  \
+      this->type = IR_TYPE;                                     \
+      this->elemNum = ELEMNUM;                                  \
+      if (elemNum * ELEMNUM > 8)                                \
+        this->data.p = malloc(ELEMNUM * getTypeSize());         \
+      else                                                      \
+        this->data.p = &defaultData;                            \
+      defaultData = 0ull;                                       \
+      memcpy(this->data.FIELD, FIELD, ELEMNUM * getTypeSize()); \
+    }
+
+    DECL_CONSTRUCTOR(bool, b, TYPE_BOOL, elemNum)
+    DECL_CONSTRUCTOR(int8_t, s8, TYPE_S8, elemNum)
+    DECL_CONSTRUCTOR(uint8_t, u8, TYPE_U8, elemNum)
+    DECL_CONSTRUCTOR(int16_t, s16, TYPE_S16, elemNum)
+    DECL_CONSTRUCTOR(uint16_t, u16, TYPE_S16, elemNum)
+    DECL_CONSTRUCTOR(int32_t, s32, TYPE_S32, elemNum)
+    DECL_CONSTRUCTOR(uint32_t, u32, TYPE_S32, elemNum)
+    DECL_CONSTRUCTOR(int64_t, s64, TYPE_S64, elemNum)
+    DECL_CONSTRUCTOR(uint64_t, u64, TYPE_S64, elemNum)
+    DECL_CONSTRUCTOR(float, f32, TYPE_FLOAT, elemNum)
+    DECL_CONSTRUCTOR(double, f64, TYPE_DOUBLE, elemNum)
+#undef DECL_CONSTRUCTOR
+
+    int64_t getIntegerValue(void) const {
+      switch (type) {
+        default:
+          GBE_ASSERT(0 && "Invalid immediate type.\n");
+        case TYPE_BOOL: return *data.b;
+        case TYPE_S8:   return *data.s8;
+        case TYPE_U8:   return *data.u8;
+        case TYPE_S16:  return *data.s16;
+        case TYPE_U16:  return *data.u16;
+        case TYPE_S32:  return *data.s32;
+        case TYPE_U32:  return *data.u32;
+        case TYPE_S64:  return *data.s64;
+        case TYPE_U64:  return *data.u64;
+      }
+    }
+
+    float getFloatValue(void) const {
+      GBE_ASSERT(type == TYPE_FLOAT);
+      return *data.f32;
+    }
+
+    float asFloatValue(void) const {
+      GBE_ASSERT(type == TYPE_FLOAT || type == TYPE_U32 || type == TYPE_S32);
+      return *data.f32;
+    }
+
+    int64_t asIntegerValue(void) const {
+      GBE_ASSERT(elemNum == 1);
+      return *data.s64;
+    }
+
+    double getDoubleValue(void) const {
+      GBE_ASSERT(type == TYPE_DOUBLE);
+      return *data.f64;
+    }
+
+    Immediate(const Immediate & other) {
+      if (this != &other) {
+        this->type = other.type;
+        this->elemNum = other.elemNum;
+        if (other.data.p != &other.defaultData) {
+          this->data.p = malloc(other.elemNum * other.getTypeSize());
+          memcpy(this->data.p, other.data.p, other.elemNum * other.getTypeSize());
+        }
+        else {
+          this->defaultData = other.defaultData;
+          this->data.p = &this->defaultData;
+        }
+      }
+    }
+
+    Immediate & operator= (const Immediate & other) {
+      *this = Immediate(other);
+      return *this;
+    }
+
+    ~Immediate() {
+      if (data.p != &defaultData) {
+        free(data.p);
+        data.p = NULL;
+      }
+    }
+
+  private:
     union {
-      bool b;
-      int8_t s8;
-      uint8_t u8;
-      int16_t s16;
-      uint16_t u16;
-      int32_t s32;
-      uint32_t u32;
-      int64_t s64;
-      uint64_t u64;
-      float f32;
-      double f64;
+      bool *b;
+      int8_t *s8;
+      uint8_t *u8;
+      int16_t *s16;
+      uint16_t *u16;
+      int32_t *s32;
+      uint32_t *u32;
+      int64_t *s64;
+      uint64_t *u64;
+      float *f32;
+      double *f64;
+      void *p;
     } data;     //!< Value to store
     Type type;  //!< Type of the value
+    uint32_t elemNum; //!< vector imm data type
+    uint64_t defaultData;
+
     GBE_CLASS(Immediate);
   };
 
   /*! Compare two immediates */
   INLINE bool operator< (const Immediate &imm0, const Immediate &imm1) {
-    if (imm0.type != imm1.type)
-      return uint32_t(imm0.type) < uint32_t(imm1.type);
-    return imm0.data.u64 < imm1.data.u64;
+    if (imm0.getType() != imm1.getType())
+      return uint32_t(imm0.getType()) < uint32_t(imm1.getType());
+    else if (imm0.getType() == TYPE_FLOAT || imm0.getType() == TYPE_DOUBLE)
+      return imm0.asIntegerValue() < imm1.asIntegerValue();
+    else
+      return imm0.getIntegerValue() < imm1.getIntegerValue();
   }
 
   /*! A value is stored in a per-function vector. This is the index to it */
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 23848d3..aa69148 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -987,7 +987,7 @@ namespace ir {
         whyNot = "Out-of-bound immediate value index";
         return false;
       }
-      const ir::Type immType = fn.getImmediate(immediateIndex).type;
+      const ir::Type immType = fn.getImmediate(immediateIndex).getType();
       if (UNLIKELY(type != immType)) {
         whyNot = "Inconsistant type for the immediate value to load";
         return false;
diff --git a/backend/src/ir/lowering.cpp b/backend/src/ir/lowering.cpp
index 8042711..739e944 100644
--- a/backend/src/ir/lowering.cpp
+++ b/backend/src/ir/lowering.cpp
@@ -120,19 +120,19 @@ namespace ir {
   };
 
   INLINE uint64_t getOffsetFromImm(const Immediate &imm) {
-    switch (imm.type) {
+    switch (imm.getType()) {
       // bit-cast these ones
       case TYPE_DOUBLE:
-      case TYPE_FLOAT:
+      case TYPE_FLOAT: NOT_SUPPORTED; return 0;
       case TYPE_S64:
       case TYPE_U64:
       case TYPE_U32:
       case TYPE_U16:
-      case TYPE_U8: return imm.data.u64;
+      case TYPE_U8:
       // sign extend these ones
-      case TYPE_S32: return int64_t(imm.data.s32);
-      case TYPE_S16: return int64_t(imm.data.s16);
-      case TYPE_S8: return int64_t(imm.data.s8);
+      case TYPE_S32:
+      case TYPE_S16:
+      case TYPE_S8: return imm.getIntegerValue();
       case TYPE_BOOL:
       case TYPE_HALF: NOT_SUPPORTED; return 0;
     }
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 4633b6b..064aa55 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1011,8 +1011,8 @@ namespace gbe
 
     const ir::ImmediateIndex immIndex = this->newImmediate(c, elemID);
     const ir::Immediate imm = ctx.getImmediate(immIndex);
-    const ir::Register reg = ctx.reg(getFamily(imm.type));
-    ctx.LOADI(imm.type, reg, immIndex);
+    const ir::Register reg = ctx.reg(getFamily(imm.getType()));
+    ctx.LOADI(imm.getType(), reg, immIndex);
     return reg;
   }
 
@@ -2010,7 +2010,7 @@ namespace gbe
           const ir::ImmediateIndex index = ctx.newImmediate(CPV);
           const ir::Immediate imm = ctx.getImmediate(index);
           const ir::Register reg = this->getRegister(dstValue);
-          ctx.LOADI(imm.type, reg, index);
+          ctx.LOADI(imm.getType(), reg, index);
         }
       }
       break;
@@ -2535,9 +2535,9 @@ namespace gbe
       // This is not a kernel argument sampler, we need to append it to sampler set,
       // and allocate a sampler slot for it.
       auto x = processConstant<ir::Immediate>(CPV, InsertExtractFunctor(ctx));
-      GBE_ASSERTM(x.type == ir::TYPE_U16 || x.type == ir::TYPE_S16, "Invalid sampler type");
+      GBE_ASSERTM(x.getType() == ir::TYPE_U16 || x.getType() == ir::TYPE_S16, "Invalid sampler type");
 
-      index = ctx.getFunction().getSamplerSet()->append(x.data.u32, &ctx);
+      index = ctx.getFunction().getSamplerSet()->append(x.getIntegerValue(), &ctx);
     } else {
       const ir::Register samplerReg = this->getRegister(*AI);
       index = ctx.getFunction().getSamplerSet()->append(samplerReg, &ctx);
@@ -2761,8 +2761,8 @@ handle_read_image:
             GBE_ASSERT(AI != AE); Constant *CPV = dyn_cast<Constant>(*AI);
             assert(CPV);
             auto x = processConstant<ir::Immediate>(CPV, InsertExtractFunctor(ctx));
-            GBE_ASSERTM(x.type == ir::TYPE_U32 || x.type == ir::TYPE_S32, "Invalid sampler type");
-            samplerOffset = x.data.u32;
+            GBE_ASSERTM(x.getType() == ir::TYPE_U32 || x.getType() == ir::TYPE_S32, "Invalid sampler type");
+            samplerOffset = x.getIntegerValue();
 #endif
             const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], elemNum);
             const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 3);
@@ -3146,10 +3146,11 @@ handle_write_image:
       Constant *CPV = dyn_cast<Constant>(src);
       GBE_ASSERT(CPV);
       const uint64_t elemNum = processConstant<uint64_t>(CPV, U64CPVExtractFunctor(ctx));
-      ir::Immediate imm = ctx.getImmediate(immIndex);
-      imm.data.u64 = ALIGN(imm.data.u64 * elemNum, 4);
       elementSize *= elemNum;
-      ctx.setImmediate(immIndex, imm);
+      if (ctx.getPointerSize() == ir::POINTER_32_BITS)
+        immIndex = ctx.newImmediate(uint32_t(ALIGN(elementSize, 4)));
+      else
+        immIndex = ctx.newImmediate(uint64_t(ALIGN(elementSize, 4)));
     }
 
     // Now emit the stream of instructions to get the allocated pointer
@@ -3175,10 +3176,10 @@ handle_write_image:
       }
     }
     // Set the destination register properly
-    ctx.MOV(imm.type, dst, stack);
+    ctx.MOV(imm.getType(), dst, stack);
 
-    ctx.LOADI(imm.type, reg, immIndex);
-    ctx.ADD(imm.type, stack, stack, reg);
+    ctx.LOADI(imm.getType(), reg, immIndex);
+    ctx.ADD(imm.getType(), stack, stack, reg);
     ctx.getFunction().pushStackSize(elementSize);
   }
 
-- 
2.7.4