From: Benjamin Segovia <segovia.benjamin@gmail.com>
Date: Mon, 21 May 2012 17:41:37 +0000 (+0000)
Subject: Now booleans are encoded with 1 short instead of a vector of shorts.
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6d1bea7e99b94e8fe1f19fddbd5b1386aeda4d21;p=contrib%2Fbeignet.git

Now booleans are encoded with 1 short instead of a vector of shorts.
---

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 613678a..bbdd595 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -35,7 +35,6 @@ namespace gbe
   ///////////////////////////////////////////////////////////////////////////
   // Various helper functions
   ///////////////////////////////////////////////////////////////////////////
-
   INLINE uint32_t getGenType(ir::Type type) {
     using namespace ir;
     switch (type) {
@@ -67,12 +66,9 @@ namespace gbe
   ///////////////////////////////////////////////////////////////////////////
   // GenContext implementation
   ///////////////////////////////////////////////////////////////////////////
-
   GenContext::GenContext(const ir::Unit &unit, const std::string &name) :
     Context(unit, name)
-  {
-    p = GBE_NEW(GenEmitter, simdWidth, 7); // XXX handle more than gen7
-  }
+  { p = GBE_NEW(GenEmitter, simdWidth, 7); } // XXX handle more than Gen7
 
   GenContext::~GenContext(void) { GBE_DELETE(p); }
 
@@ -92,6 +88,15 @@ namespace gbe
       return GenReg::Qn(genReg, quarter);
   }
 
+  bool GenContext::isScalarOrBool(ir::Register reg) const {
+    if (this->isScalarReg(reg))
+      return true;
+    else {
+      const ir::RegisterFamily family = fn.getRegisterFamily(reg);
+      return family == ir::FAMILY_BOOL;
+    }
+  }
+
   // Per-lane block IPs are always pre-allocated and used for branches. We just
   // 0xffff as a fake register for them
   static const ir::Register blockIPReg(0xffff);
@@ -102,7 +107,7 @@ namespace gbe
   static const size_t familyScalarSize[] = {2,1,2,4,8};
 
 #define INSERT_REG(SIMD16, SIMD8, SIMD1) \
-  if (this->isScalarReg(reg) == true) \
+  if (this->isScalarOrBool(reg) == true) \
     RA.insert(std::make_pair(reg, GenReg::SIMD1(nr, subnr))); \
   else if (this->simdWidth == 8) \
     RA.insert(std::make_pair(reg, GenReg::SIMD8(nr, subnr))); \
@@ -120,12 +125,12 @@ namespace gbe
       const uint32_t offset = curbeOffset + subOffset;
       const ir::RegisterData data = fn.getRegisterData(reg);
       const ir::RegisterFamily family = data.family;
-      const bool isScalar = this->isScalarReg(reg);
+      const bool isScalar = this->isScalarOrBool(reg);
       const uint32_t typeSize = isScalar ? familyScalarSize[family] : familyVectorSize[family];
       const uint32_t nr = (offset + GEN_REG_SIZE) / GEN_REG_SIZE;
       const uint32_t subnr = ((offset + GEN_REG_SIZE) % GEN_REG_SIZE) / typeSize;
       switch (family) {
-        case FAMILY_BOOL:
+        case FAMILY_BOOL: INSERT_REG(uw1grf, uw1grf, uw1grf); break;
         case FAMILY_WORD: INSERT_REG(uw16grf, uw8grf, uw1grf); break;
         case FAMILY_BYTE: INSERT_REG(ub16grf, ub8grf, ub1grf); break;
         case FAMILY_DWORD: INSERT_REG(f16grf, f8grf, f1grf); break;
@@ -137,7 +142,7 @@ namespace gbe
 #undef INSERT_REG
 
 #define INSERT_REG(SIMD16, SIMD8, SIMD1) \
-  if (this->isScalarReg(reg) == true) { \
+  if (this->isScalarOrBool(reg) == true) { \
     RA.insert(std::make_pair(reg, GenReg::SIMD1(nr, subnr))); \
     grfOffset += typeSize; \
   } else if (simdWidth == 16) { \
@@ -155,16 +160,17 @@ namespace gbe
     if (fn.getArg(reg) != NULL) return grfOffset; // already done
     if (fn.getPushLocation(reg) != NULL) return grfOffset; // already done
     GBE_ASSERT(this->isScalarReg(reg) == false);
+    const bool isScalar = this->isScalarOrBool(reg);
     const RegisterData regData = fn.getRegisterData(reg);
     const RegisterFamily family = regData.family;
-    const uint32_t typeSize = familyVectorSize[family];
+    const uint32_t typeSize = isScalar ? familyScalarSize[family] : familyVectorSize[family];
     const uint32_t regSize = simdWidth*typeSize;
     grfOffset = ALIGN(grfOffset, regSize);
     if (grfOffset + regSize <= GEN_GRF_SIZE) {
       const uint32_t nr = grfOffset / GEN_REG_SIZE;
       const uint32_t subnr = (grfOffset % GEN_REG_SIZE) / typeSize;
       switch (family) {
-        case FAMILY_BOOL:
+        case FAMILY_BOOL: INSERT_REG(uw1grf, uw1grf, uw1grf); break;
         case FAMILY_WORD: INSERT_REG(uw16grf, uw8grf, uw1grf); break;
         case FAMILY_BYTE: INSERT_REG(ub16grf, ub8grf, ub1grf); break;
         case FAMILY_DWORD: INSERT_REG(f16grf, f8grf, f1grf); break;
@@ -306,20 +312,19 @@ namespace gbe
     // register from the boolean vector
     if (insn.isPredicated() == true) {
       const GenReg pred = this->genReg(insn.getPredicateIndex(), TYPE_U16);
+
+      // Reset the flag register
       p->push();
-        p->curr.noMask = 1;
-        p->curr.execWidth = 1;
         p->curr.predicate = GEN_PREDICATE_NONE;
-        p->MOV(GenReg::flag(0,1), GenReg::flag(0,0));
+        p->curr.execWidth = 1;
+        p->curr.noMask = 1;
+        p->MOV(GenReg::flag(0,1), pred);
       p->pop();
 
-      // Rebuild the flag register by comparing the boolean with 1s
+      // Update the PcIPs
       p->push();
         p->curr.flag = 0;
         p->curr.subFlag = 1;
-        p->CMP(GEN_CONDITIONAL_EQ, pred, GenReg::immuw(1));
-
-        // Update the PcIPs
         p->MOV(ip, GenReg::immuw(uint16_t(dst)));
       p->pop();
 
@@ -372,16 +377,9 @@ namespace gbe
     const BasicBlock &bb = fn.getBlock(src);
     GBE_ASSERT(bb.getNextBlock() != NULL);
 
-    // Inefficient code. If the instruction is predicated, we build the flag
-    // register from the boolean vector
+    // Inefficient code: we make a GRF to flag conversion
     if (insn.isPredicated() == true) {
       const GenReg pred = this->genReg(insn.getPredicateIndex(), TYPE_U16);
-      p->push();
-        p->curr.noMask = 1;
-        p->curr.execWidth = 1;
-        p->curr.predicate = GEN_PREDICATE_NONE;
-        p->MOV(GenReg::flag(0,1), GenReg::flag(0,0));
-      p->pop();
 
       // Update the PcIPs for all the branches. Just put the IPs of the next
       // block. Next instruction will properly reupdate the IPs of the lanes
@@ -391,9 +389,15 @@ namespace gbe
 
       // Rebuild the flag register by comparing the boolean with 1s
       p->push();
+        p->curr.noMask = 1;
+        p->curr.execWidth = 1;
+        p->curr.predicate = GEN_PREDICATE_NONE;
+        p->MOV(GenReg::flag(0,1), pred);
+      p->pop();
+
+      p->push();
         p->curr.flag = 0;
         p->curr.subFlag = 1;
-        p->CMP(GEN_CONDITIONAL_EQ, pred, GenReg::immuw(1));
 
         // Re-update the PcIPs for the branches that takes the backward jump
         p->MOV(ip, GenReg::immuw(uint16_t(dst)));
@@ -435,6 +439,15 @@ namespace gbe
     GenReg src0 = this->genReg(insn.getSrc(0), type);
     GenReg src1 = this->genReg(insn.getSrc(1), type);
 
+    p->push();
+
+    // Boolean values use scalars
+    if (this->isScalarOrBool(insn.getDst(0)) == true) {
+      p->curr.execWidth = 1;
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->curr.noMask = 1;
+    }
+
     // Output the binary instruction
     switch (opcode) {
       case OP_ADD: p->ADD(dst, src0, src1); break;
@@ -451,16 +464,17 @@ namespace gbe
           this->emitIntMul32x32(insn, dst, src0, src1);
         else
           NOT_IMPLEMENTED;
-        break;
       }
+      break;
       case OP_DIV:
       {
         GBE_ASSERT(type == TYPE_FLOAT);
         p->MATH(dst, GEN_MATH_FUNCTION_FDIV, src0, src1);
-        break;
       }
+      break;
       default: NOT_IMPLEMENTED;
     }
+    p->pop();
   }
 
   void GenContext::emitTernaryInstruction(const ir::TernaryInstruction &insn) {
@@ -500,12 +514,11 @@ namespace gbe
       p->CMP(genCmp, src0, src1);
     p->pop();
 
-    // We emit a very unoptimized code where we store the resulting mask in a
-    // GRF
+    // We emit an unoptimized code where we store the resulting mask in a GRF
     p->push();
-      p->curr.flag = 0;
-      p->curr.subFlag = 1;
-      p->SEL(dst, GenReg::uw1grf(127,0), GenReg::immuw(0));
+      p->curr.execWidth = 1;
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->MOV(dst, GenReg::flag(0,1));
     p->pop();
   }
 
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 136d448..b661516 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -66,7 +66,10 @@ namespace gbe
     GenReg genReg(ir::Register reg, ir::Type type = ir::TYPE_FLOAT);
     /*! Compute the second instruction when using SIMD8 with Qn (n in 2,3,4) */
     GenReg genRegQn(ir::Register reg, uint32_t quarter, ir::Type type = ir::TYPE_FLOAT);
-
+    /*! Bool registers will use scalar words. So we will consider them as
+     *  scalars in Gen backend
+     */
+    bool isScalarOrBool(ir::Register reg) const;
     /*! Emit instruction per family */
     void emitUnaryInstruction(const ir::UnaryInstruction &insn);
     void emitBinaryInstruction(const ir::BinaryInstruction &insn);