GBE: Change 64bit integer storage in register

author Ruiling Song <ruiling.song@intel.com>

Thu, 29 May 2014 02:29:33 +0000 (10:29 +0800)

committer Zhigang Gong <zhigang.gong@intel.com>

Thu, 29 May 2014 06:49:03 +0000 (14:49 +0800)
author Ruiling Song <ruiling.song@intel.com>
Thu, 29 May 2014 02:29:33 +0000 (10:29 +0800)
committer Zhigang Gong <zhigang.gong@intel.com>
Thu, 29 May 2014 06:49:03 +0000 (14:49 +0800)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp

index 0f29468..7a0a806 100644 (file)
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -213,17 +213,7 @@ namespace gbe
        case SEL_OP_LOAD_INT64_IMM: p->LOAD_INT64_IMM(dst, src.value.i64); break;
        case SEL_OP_CONVI64_TO_I:
         {
-        int execWidth = p->curr.execWidth;
-        GenRegister xsrc = src.bottom_half(), xdst = dst;
-        p->push();
-        p->curr.execWidth = 8;
-        for(int i = 0; i < execWidth/4; i ++) {
-          p->curr.chooseNib(i);
-          p->MOV(xdst, xsrc);
-          xdst = GenRegister::suboffset(xdst, 4);
-          xsrc = GenRegister::suboffset(xsrc, 4);
-        }
-        p->pop();
+        p->MOV(dst, src.bottom_half());
          break;
         }
        case SEL_OP_BRC:
@@ -268,28 +258,18 @@ namespace gbe
          p->MOV_DF(dst, src, tmp);
          break;
        case SEL_OP_CONVI_TO_I64: {
-        GenRegister middle;
-        if (src.type == GEN_TYPE_B || src.type == GEN_TYPE_D) {
+        GenRegister middle = src;
+        if(src.type == GEN_TYPE_B || src.type == GEN_TYPE_W) {
            middle = tmp;
-          middle.type = src.is_signed_int() ? GEN_TYPE_D : GEN_TYPE_UD;
+          middle.type = GEN_TYPE_D;
            p->MOV(middle, src);
-        } else {
-          middle = src;
          }
-        int execWidth = p->curr.execWidth;
-        p->push();
-        p->curr.execWidth = 8;
-        for (int nib = 0; nib < execWidth / 4; nib ++) {
-          p->curr.chooseNib(nib);
-          p->MOV(dst.bottom_half(), middle);
-          if(middle.is_signed_int())
-            p->ASR(dst.top_half(), middle, GenRegister::immud(31));
-          else
-            p->MOV(dst.top_half(), GenRegister::immd(0));
-          dst = GenRegister::suboffset(dst, 4);
-          middle = GenRegister::suboffset(middle, 4);
-        }
-        p->pop();
+
+        p->MOV(dst.bottom_half(), middle);
+        if(src.is_signed_int())
+          p->ASR(dst.top_half(this->simdWidth), middle, GenRegister::immud(31));
+        else
+          p->MOV(dst.top_half(this->simdWidth), GenRegister::immud(0));
          break;
        }
        default:
@@ -304,8 +284,10 @@ namespace gbe
      GenRegister tmp = ra->genReg(insn.dst(1));
      switch (insn.opcode) {
        case SEL_OP_I64ADD: {
-        GenRegister x = GenRegister::retype(tmp, GEN_TYPE_UD),
-                    y = GenRegister::suboffset(x, p->curr.execWidth);
+        tmp = GenRegister::retype(tmp, GEN_TYPE_UL);
+        GenRegister x = tmp.bottom_half();
+        GenRegister y = tmp.top_half(this->simdWidth);
+
          loadBottomHalf(x, src0);
          loadBottomHalf(y, src1);
          addWithCarry(x, x, y);
@@ -318,8 +300,10 @@ namespace gbe
          break;
        }
        case SEL_OP_I64SUB: {
-        GenRegister x = GenRegister::retype(tmp, GEN_TYPE_UD),
-                    y = GenRegister::suboffset(x, p->curr.execWidth);
+        tmp = GenRegister::retype(tmp, GEN_TYPE_UL);
+        GenRegister x = tmp.bottom_half();
+        GenRegister y = tmp.top_half(this->simdWidth);
+
          loadBottomHalf(x, src0);
          loadBottomHalf(y, src1);
          subWithBorrow(x, x, y);
@@ -400,21 +384,8 @@ namespace gbe
        case SEL_OP_SEL:  p->SEL(dst, src0, src1); break;
        case SEL_OP_SEL_INT64:
          {
-          GenRegister xdst = GenRegister::retype(dst, GEN_TYPE_UL),
-                      xsrc0 = GenRegister::retype(src0, GEN_TYPE_UL),
-                      xsrc1 = GenRegister::retype(src1, GEN_TYPE_UL);
-          int execWidth = p->curr.execWidth;
-          p->push();
-          p->curr.execWidth = 8;
-          for (int nib = 0; nib < execWidth / 4; nib ++) {
-            p->curr.chooseNib(nib);
-            p->SEL(xdst.bottom_half(), xsrc0.bottom_half(), xsrc1.bottom_half());
-            p->SEL(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half());
-            xdst = GenRegister::suboffset(xdst, 4);
-            xsrc0 = GenRegister::suboffset(xsrc0, 4);
-            xsrc1 = GenRegister::suboffset(xsrc1, 4);
-          }
-          p->pop();
+          p->SEL(dst.bottom_half(), src0.bottom_half(), src1.bottom_half());
+          p->SEL(dst.top_half(this->simdWidth), src0.top_half(this->simdWidth), src1.top_half(this->simdWidth));
          }
          break;
        case SEL_OP_AND:  p->AND(dst, src0, src1, insn.extra.function); break;
@@ -422,59 +393,20 @@ namespace gbe
        case SEL_OP_XOR:  p->XOR(dst, src0, src1, insn.extra.function); break;
        case SEL_OP_I64AND:
          {
-          GenRegister xdst = GenRegister::retype(dst, GEN_TYPE_UL),
-                      xsrc0 = GenRegister::retype(src0, GEN_TYPE_UL),
-                      xsrc1 = GenRegister::retype(src1, GEN_TYPE_UL);
-          int execWidth = p->curr.execWidth;
-          p->push();
-          p->curr.execWidth = 8;
-          for (int nib = 0; nib < execWidth / 4; nib ++) {
-            p->curr.chooseNib(nib);
-            p->AND(xdst.bottom_half(), xsrc0.bottom_half(), xsrc1.bottom_half());
-            p->AND(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half());
-            xdst = GenRegister::suboffset(xdst, 4),
-            xsrc0 = GenRegister::suboffset(xsrc0, 4),
-            xsrc1 = GenRegister::suboffset(xsrc1, 4);
-          }
-          p->pop();
+          p->AND(dst.bottom_half(), src0.bottom_half(), src1.bottom_half());
+          p->AND(dst.top_half(this->simdWidth), src0.top_half(this->simdWidth), src1.top_half(this->simdWidth));
          }
          break;
        case SEL_OP_I64OR:
          {
-          GenRegister xdst = GenRegister::retype(dst, GEN_TYPE_UL),
-                      xsrc0 = GenRegister::retype(src0, GEN_TYPE_UL),
-                      xsrc1 = GenRegister::retype(src1, GEN_TYPE_UL);
-          int execWidth = p->curr.execWidth;
-          p->push();
-          p->curr.execWidth = 8;
-          for (int nib = 0; nib < execWidth / 4; nib ++) {
-            p->curr.chooseNib(nib);
-            p->OR(xdst.bottom_half(), xsrc0.bottom_half(), xsrc1.bottom_half());
-            p->OR(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half());
-            xdst = GenRegister::suboffset(xdst, 4),
-            xsrc0 = GenRegister::suboffset(xsrc0, 4),
-            xsrc1 = GenRegister::suboffset(xsrc1, 4);
-          }
-          p->pop();
+          p->OR(dst.bottom_half(), src0.bottom_half(), src1.bottom_half());
+          p->OR(dst.top_half(this->simdWidth), src0.top_half(this->simdWidth), src1.top_half(this->simdWidth));
          }
          break;
        case SEL_OP_I64XOR:
          {
-          GenRegister xdst = GenRegister::retype(dst, GEN_TYPE_UL),
-                      xsrc0 = GenRegister::retype(src0, GEN_TYPE_UL),
-                      xsrc1 = GenRegister::retype(src1, GEN_TYPE_UL);
-          int execWidth = p->curr.execWidth;
-          p->push();
-          p->curr.execWidth = 8;
-          for (int nib = 0; nib < execWidth / 4; nib ++) {
-            p->curr.chooseNib(nib);
-            p->XOR(xdst.bottom_half(), xsrc0.bottom_half(), xsrc1.bottom_half());
-            p->XOR(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half());
-            xdst = GenRegister::suboffset(xdst, 4),
-            xsrc0 = GenRegister::suboffset(xsrc0, 4),
-            xsrc1 = GenRegister::suboffset(xsrc1, 4);
-          }
-          p->pop();
+          p->XOR(dst.bottom_half(), src0.bottom_half(), src1.bottom_half());
+          p->XOR(dst.top_half(this->simdWidth), src0.top_half(this->simdWidth), src1.top_half(this->simdWidth));
          }
          break;
        case SEL_OP_SHR:  p->SHR(dst, src0, src1); break;
@@ -492,18 +424,8 @@ namespace gbe
            GenRegister xdst = GenRegister::retype(dst, GEN_TYPE_UL),
                        xsrc0 = GenRegister::retype(src0, GEN_TYPE_UL),
                        xsrc1 = GenRegister::retype(src1, GEN_TYPE_UL);
-          int execWidth = p->curr.execWidth;
-          p->push();
-          p->curr.execWidth = 8;
-          for (int nib = 0; nib < execWidth / 4; nib ++) {
-            p->curr.chooseNib(nib);
-            p->MOV(xdst.top_half(), xsrc0.bottom_half());
-            p->MOV(xdst.bottom_half(), xsrc1.bottom_half());
-            xdst = GenRegister::suboffset(xdst, 4);
-            xsrc0 = GenRegister::suboffset(xsrc0, 4);
-            xsrc1 = GenRegister::suboffset(xsrc1, 4);
-          }
-          p->pop();
+          p->MOV(xdst.top_half(this->simdWidth), xsrc0.bottom_half());
+          p->MOV(xdst.bottom_half(), xsrc1.bottom_half());
          }
          break;
        default: NOT_IMPLEMENTED;
@@ -511,16 +433,10 @@ namespace gbe
    }
  
    void GenContext::collectShifter(GenRegister dest, GenRegister src) {
-    int execWidth = p->curr.execWidth;
      p->push();
-    p->curr.predicate = GEN_PREDICATE_NONE;
-    p->curr.noMask = 1;
-    p->curr.execWidth = 8;
-    for (int nib = 0; nib < execWidth / 4; nib ++) {
-      p->AND(dest, src.bottom_half(), GenRegister::immud(63));
-      dest = GenRegister::suboffset(dest, 4);
-      src = GenRegister::suboffset(src, 4);
-    }
+      p->curr.predicate = GEN_PREDICATE_NONE;
+      p->curr.noMask = 1;
+    p->AND(dest, src.bottom_half(), GenRegister::immud(63));
      p->pop();
    }
  
@@ -1267,73 +1183,19 @@ namespace gbe
    }
  
    void GenContext::loadTopHalf(GenRegister dest, GenRegister src) {
-    int execWidth = p->curr.execWidth;
-    src = src.top_half();
-    p->push();
-    p->curr.predicate = GEN_PREDICATE_NONE;
-    p->curr.noMask = 1;
-    p->curr.execWidth = 8;
-    p->MOV(dest, src);
-    p->MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(src, 4));
-    if (execWidth == 16) {
-      p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 8));
-      p->MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(src, 12));
-    }
-    p->pop();
+    p->MOV(dest, src.top_half(this->simdWidth));
    }
  
    void GenContext::storeTopHalf(GenRegister dest, GenRegister src) {
-    int execWidth = p->curr.execWidth;
-    dest = dest.top_half();
-    p->push();
-    p->curr.noMask = 0;
-    p->curr.execWidth = 8;
-    p->MOV(dest, src);
-    p->curr.nibControl = 1;
-    p->MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(src, 4));
-    if (execWidth == 16) {
-      p->curr.quarterControl = 1;
-      p->curr.nibControl = 0;
-      p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 8));
-      p->curr.nibControl = 1;
-      p->MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(src, 12));
-    }
-    p->pop();
+    p->MOV(dest.top_half(this->simdWidth), src);
    }
  
    void GenContext::loadBottomHalf(GenRegister dest, GenRegister src) {
-    int execWidth = p->curr.execWidth;
-    src = src.bottom_half();
-    p->push();
-    p->curr.predicate = GEN_PREDICATE_NONE;
-    p->curr.noMask = 1;
-    p->curr.execWidth = 8;
-    p->MOV(dest, src);
-    p->MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(src, 4));
-    if (execWidth == 16) {
-      p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 8));
-      p->MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(src, 12));
-    }
-    p->pop();
+    p->MOV(dest, src.bottom_half());
    }
  
    void GenContext::storeBottomHalf(GenRegister dest, GenRegister src) {
-    int execWidth = p->curr.execWidth;
-    dest = dest.bottom_half();
-    p->push();
-    p->curr.execWidth = 8;
-    p->curr.noMask = 0;
-    p->MOV(dest, src);
-    p->curr.nibControl = 1;
-    p->MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(src, 4));
-    if (execWidth == 16) {
-      p->curr.quarterControl = 1;
-      p->curr.nibControl = 0;
-      p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 8));
-      p->curr.nibControl = 1;
-      p->MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(src, 12));
-    }
-    p->pop();
+    p->MOV(dest.bottom_half(), src);
    }
  
    void GenContext::addWithCarry(GenRegister dest, GenRegister src0, GenRegister src1) {
@@ -1770,18 +1632,12 @@ namespace gbe
      p->pop();
    }
  
-  //  For SIMD8, we allocate 2*elemNum temporary registers from dst(0), and
-  //  then follow the real destination registers.
-  //  For SIMD16, we allocate elemNum temporary registers from dst(0).
    void GenContext::emitRead64Instruction(const SelectionInstruction &insn) {
      const uint32_t elemNum = insn.extra.elem;
-    const uint32_t tmpRegSize = (p->curr.execWidth == 8) ? elemNum * 2 : elemNum;
-    const GenRegister tempAddr = ra->genReg(insn.dst(tmpRegSize + 1));
-    const GenRegister dst = ra->genReg(insn.dst(tmpRegSize));
-    const GenRegister tmp = ra->genReg(insn.dst(0));
+    const GenRegister dst = ra->genReg(insn.dst(0));
      const GenRegister src = ra->genReg(insn.src(0));
      const uint32_t bti = insn.getbti();
-    p->READ64(dst, tmp, tempAddr, src, bti, elemNum);
+    p->UNTYPED_READ(dst, src, bti, elemNum*2);
    }
  
    void GenContext::emitUntypedReadInstruction(const SelectionInstruction &insn) {
@@ -1792,17 +1648,11 @@ namespace gbe
      p->UNTYPED_READ(dst, src, bti, elemNum);
    }
  
-  //  For SIMD8, we allocate 2*elemNum temporary registers from dst(0), and
-  //  then follow the real destination registers.
-  //  For SIMD16, we allocate elemNum temporary registers from dst(0).
    void GenContext::emitWrite64Instruction(const SelectionInstruction &insn) {
      const GenRegister src = ra->genReg(insn.dst(0));
      const uint32_t elemNum = insn.extra.elem;
-    const GenRegister addr = ra->genReg(insn.src(0)); //tmpRegSize + 1));
-    const GenRegister data = ra->genReg(insn.src(1));
      const uint32_t bti = insn.getbti();
-    p->MOV(src, addr);
-    p->WRITE64(src, data, bti, elemNum, sel->isScalarReg(data.reg()));
+    p->UNTYPED_WRITE(src, bti, elemNum*2);
    }
  
    void GenContext::emitUntypedWriteInstruction(const SelectionInstruction &insn) {
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp

index ed2fd32..abb04e6 100644 (file)
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -221,6 +221,7 @@ namespace gbe
    GenEncoder::GenEncoder(uint32_t simdWidth, uint32_t gen, uint32_t deviceID, int jump_width) :
      stateNum(0), gen(gen), deviceID(deviceID), jump_width(jump_width)
    {
+    this->simdWidth = simdWidth;
      this->curr.execWidth = simdWidth;
      this->curr.quarterControl = GEN_COMPRESSION_Q1;
      this->curr.noMask = 0;
@@ -381,76 +382,6 @@ namespace gbe
      0
    };
  
-  void GenEncoder::READ64(GenRegister dst, GenRegister tmp, GenRegister addr, GenRegister src, uint32_t bti, uint32_t elemNum) {
-    GenRegister dst32 = GenRegister::retype(dst, GEN_TYPE_UD);
-    src = GenRegister::retype(src, GEN_TYPE_UD);
-    addr = GenRegister::retype(addr, GEN_TYPE_UD);
-    tmp = GenRegister::retype(tmp, GEN_TYPE_UD);
-    uint32_t originSimdWidth = curr.execWidth;
-    uint32_t originPredicate = curr.predicate;
-    uint32_t originMask = curr.noMask;
-    push();
-    for ( uint32_t channels = 0, currQuarter = GEN_COMPRESSION_Q1;
-          channels < originSimdWidth; channels += 8, currQuarter++) {
-      curr.predicate = GEN_PREDICATE_NONE;
-      curr.noMask = GEN_MASK_DISABLE;
-      curr.execWidth = 8;
-      /* XXX The following instruction is illegal, but it works as SIMD 1*4 mode
-         which is what we want here. */
-      MOV(GenRegister::h2(addr), GenRegister::suboffset(src, channels));
-      ADD(GenRegister::h2(GenRegister::suboffset(addr, 1)), GenRegister::suboffset(src, channels), GenRegister::immd(4));
-      MOV(GenRegister::h2(GenRegister::suboffset(addr, 8)), GenRegister::suboffset(src, channels + 4));
-      ADD(GenRegister::h2(GenRegister::suboffset(addr, 9)), GenRegister::suboffset(src, channels + 4), GenRegister::immd(4));
-      // Let's use SIMD16 to read all bytes for 8 doubles data at one time.
-      curr.execWidth = 16;
-      this->UNTYPED_READ(tmp, addr, bti, elemNum);
-      if (originSimdWidth == 16)
-        curr.quarterControl = currQuarter;
-      curr.predicate = originPredicate;
-      curr.noMask = originMask;
-      // Back to simd8 for correct predication flag.
-      curr.execWidth = 8;
-      MOV(GenRegister::retype(GenRegister::suboffset(dst32, channels * 2), GEN_TYPE_DF), GenRegister::retype(tmp, GEN_TYPE_DF));
-    }
-    pop();
-  }
-
-  void GenEncoder::WRITE64(GenRegister msg, GenRegister data, uint32_t bti, uint32_t elemNum, bool is_scalar) {
-    GenRegister data32 = GenRegister::retype(data, GEN_TYPE_UD);
-    GenRegister unpacked;
-    msg = GenRegister::retype(msg, GEN_TYPE_UD);
-    int originSimdWidth = curr.execWidth;
-    int originPredicate = curr.predicate;
-    int originMask = curr.noMask;
-    push();
-    for (uint32_t half = 0; half < 2; half++) {
-      curr.predicate = GEN_PREDICATE_NONE;
-      curr.noMask = GEN_MASK_DISABLE;
-      curr.execWidth = 8;
-      if (is_scalar) {
-        unpacked = data32;
-        unpacked.subnr += half * 4;
-      } else
-        unpacked = GenRegister::unpacked_ud(data32.nr, data32.subnr + half);
-      MOV(GenRegister::suboffset(msg, originSimdWidth), unpacked);
-      if (originSimdWidth == 16) {
-        if (is_scalar) {
-          unpacked = data32;
-          unpacked.subnr += half * 4;
-        } else
-          unpacked = GenRegister::unpacked_ud(data32.nr + 2, data32.subnr + half);
-        MOV(GenRegister::suboffset(msg, originSimdWidth + 8), unpacked);
-        curr.execWidth = 16;
-      }
-      if (half == 1)
-        ADD(GenRegister::retype(msg, GEN_TYPE_UD), GenRegister::retype(msg, GEN_TYPE_UD), GenRegister::immd(4));
-      curr.predicate = originPredicate;
-      curr.noMask = originMask;
-      this->UNTYPED_WRITE(msg, bti, elemNum);
-    }
-    pop();
-  }
-
    void GenEncoder::UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum) {
      GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
      assert(elemNum >= 1 || elemNum <= 4);
@@ -683,17 +614,8 @@ namespace gbe
       if (dst.isdf() && src.isdf()) {
         handleDouble(p, opcode, dst, src);
       } else if (dst.isint64() && src.isint64()) { // handle int64
-       int execWidth = p->curr.execWidth;
-       p->push();
-       p->curr.execWidth = 8;
-       for (int nib = 0; nib < execWidth / 4; nib ++) {
-         p->curr.chooseNib(nib);
-         p->MOV(dst.bottom_half(), src.bottom_half());
-         p->MOV(dst.top_half(), src.top_half());
-         dst = GenRegister::suboffset(dst, 4);
-         src = GenRegister::suboffset(src, 4);
-       }
-       p->pop();
+       p->MOV(dst.bottom_half(), src.bottom_half());
+       p->MOV(dst.top_half(p->simdWidth), src.top_half(p->simdWidth));
       } else if (needToSplitAlu1(p, dst, src) == false) {
        if(compactAlu1(p, opcode, dst, src, condition, false))
          return;
@@ -926,16 +848,8 @@ namespace gbe
  
    void GenEncoder::LOAD_INT64_IMM(GenRegister dest, int64_t value) {
      GenRegister u0 = GenRegister::immd((int)value), u1 = GenRegister::immd(value >> 32);
-    int execWidth = curr.execWidth;
-    push();
-    curr.execWidth = 8;
-    for(int nib = 0; nib < execWidth/4; nib ++) {
-      curr.chooseNib(nib);
-      MOV(dest.top_half(), u1);
-      MOV(dest.bottom_half(), u0);
-      dest = GenRegister::suboffset(dest, 4);
-    }
-    pop();
+    MOV(dest.bottom_half(), u0);
+    MOV(dest.top_half(this->simdWidth), u1);
    }
  
    void GenEncoder::MOV_DF(GenRegister dest, GenRegister src0, GenRegister r) {
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp

index bd4a8e7..02661d3 100644 (file)
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -90,6 +90,8 @@ namespace gbe
      uint32_t deviceID;
      /*! The constant for jump. */
      const int jump_width;
+    /*! simd width for this codegen */
+    uint32_t simdWidth;
      ////////////////////////////////////////////////////////////////////////
      // Encoding functions
      ////////////////////////////////////////////////////////////////////////
@@ -168,10 +170,6 @@ namespace gbe
      void WAIT(void);
      /*! Atomic instructions */
      virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum);
-    /*! Read 64-bits float/int arrays */
-    void READ64(GenRegister dst, GenRegister tmp, GenRegister addr, GenRegister src, uint32_t bti, uint32_t elemNum);
-    /*! Write 64-bits float/int arrays */
-    void WRITE64(GenRegister src, GenRegister data, uint32_t bti, uint32_t elemNum, bool is_scalar);
      /*! Untyped read (upto 4 channels) */
      virtual void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum);
      /*! Untyped write (upto 4 channels) */
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp

index db20162..7060240 100644 (file)
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -542,9 +542,9 @@ namespace gbe
      /*! Atomic instruction */
      void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg src1, Reg src2, uint32_t bti);
      /*! Read 64 bits float/int array */
-    void READ64(Reg addr, Reg tempAddr, const GenRegister *dst, uint32_t elemNum, uint32_t valueNum, uint32_t bti);
+    void READ64(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti);
      /*! Write 64 bits float/int array */
-    void WRITE64(Reg addr, const GenRegister *src, uint32_t srcNum, const GenRegister *dst, uint32_t dstNum, uint32_t bti);
+    void WRITE64(Reg addr, const GenRegister *src, uint32_t srcNum, uint32_t bti);
      /*! Untyped read (up to 4 elements) */
      void UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti);
      /*! Untyped write (up to 4 elements) */
@@ -1071,34 +1071,26 @@ namespace gbe
    void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); }
    void Selection::Opaque::WAIT(void) { this->appendInsn(SEL_OP_WAIT, 0, 0); }
  
-  /* elemNum contains all the temporary register and the
-     real destination registers.*/
    void Selection::Opaque::READ64(Reg addr,
-                                 Reg tempAddr,
                                   const GenRegister *dst,
                                   uint32_t elemNum,
-                                 uint32_t valueNum,
                                   uint32_t bti)
    {
-    SelectionInstruction *insn = this->appendInsn(SEL_OP_READ64, elemNum + 1, 1);
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_READ64, elemNum, 1);
      SelectionVector *srcVector = this->appendVector();
      SelectionVector *dstVector = this->appendVector();
  
      // Regular instruction to encode
      for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
        insn->dst(elemID) = dst[elemID];
-    /* temporary addr register is to be modified, set it to dst registers.*/
-    insn->dst(elemNum) = tempAddr;
      insn->src(0) = addr;
      insn->setbti(bti);
-    insn->extra.elem = valueNum;
+    insn->extra.elem = elemNum;
  
-    // Only the temporary registers need contiguous allocation
-    dstVector->regNum = elemNum - valueNum;
+    dstVector->regNum = elemNum;
      dstVector->isSrc = 0;
      dstVector->reg = &insn->dst(0);
  
-    // Source cannot be scalar (yet)
      srcVector->regNum = 1;
      srcVector->isSrc = 1;
      srcVector->reg = &insn->src(0);
@@ -1125,36 +1117,30 @@ namespace gbe
      dstVector->regNum = elemNum;
      dstVector->isSrc = 0;
      dstVector->reg = &insn->dst(0);
-    // Source cannot be scalar (yet)
+
      srcVector->regNum = 1;
      srcVector->isSrc = 1;
      srcVector->reg = &insn->src(0);
    }
  
-  /* elemNum contains all the temporary register and the
-     real data registers.*/
    void Selection::Opaque::WRITE64(Reg addr,
                                    const GenRegister *src,
                                    uint32_t srcNum,
-                                  const GenRegister *dst,
-                                  uint32_t dstNum,
                                    uint32_t bti)
    {
-    SelectionInstruction *insn = this->appendInsn(SEL_OP_WRITE64, dstNum, srcNum + 1);
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_WRITE64, 0, srcNum + 1);
      SelectionVector *vector = this->appendVector();
  
      // Regular instruction to encode
      insn->src(0) = addr;
      for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
        insn->src(elemID + 1) = src[elemID];
-    for (uint32_t elemID = 0; elemID < dstNum; ++elemID)
-      insn->dst(elemID) = dst[elemID];
+
      insn->setbti(bti);
      insn->extra.elem = srcNum;
  
-    // Only the addr + temporary registers need to be contiguous.
-    vector->regNum = dstNum;
-    vector->reg = &insn->dst(0);
+    vector->regNum = srcNum + 1;
+    vector->reg = &insn->src(0);
      vector->isSrc = 1;
    }
  
@@ -2726,18 +2712,13 @@ namespace gbe
      {
        using namespace ir;
        const uint32_t valueNum = insn.getValueNum();
-      uint32_t dstID;
        /* XXX support scalar only right now. */
        GBE_ASSERT(valueNum == 1);
  
-      // The first 16 DWORD register space is for temporary usage at encode stage.
-      uint32_t tmpRegNum = (sel.ctx.getSimdWidth() == 8) ? valueNum * 2 : valueNum;
-      GenRegister dst[valueNum + tmpRegNum];
-      for (dstID = 0; dstID < tmpRegNum ; ++dstID)
-        dst[dstID] = sel.selReg(sel.reg(FAMILY_DWORD));
-      for ( uint32_t valueID = 0; valueID < valueNum; ++dstID, ++valueID)
-        dst[dstID] = sel.selReg(insn.getValue(valueID), ir::TYPE_U64);
-      sel.READ64(addr, sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U64), dst, valueNum + tmpRegNum, valueNum, bti);
+      GenRegister dst[valueNum];
+      for ( uint32_t dstID = 0; dstID < valueNum; ++dstID)
+        dst[dstID] = sel.selReg(insn.getValue(dstID), ir::TYPE_U64);
+      sel.READ64(addr, dst, valueNum, bti);
      }
  
      void emitByteGather(Selection::Opaque &sel,
@@ -2886,22 +2867,14 @@ namespace gbe
      {
        using namespace ir;
        const uint32_t valueNum = insn.getValueNum();
-      uint32_t srcID;
        /* XXX support scalar only right now. */
        GBE_ASSERT(valueNum == 1);
-      addr = GenRegister::retype(addr, GEN_TYPE_F);
-      // The first 16 DWORD register space is for temporary usage at encode stage.
-      uint32_t tmpRegNum = (sel.ctx.getSimdWidth() == 8) ? valueNum * 2 : valueNum;
+      addr = GenRegister::retype(addr, GEN_TYPE_UD);
        GenRegister src[valueNum];
-      GenRegister dst[tmpRegNum + 1];
-      /* dst 0 is for the temporary address register. */
-      dst[0] = sel.selReg(sel.reg(FAMILY_DWORD));
-      for (srcID = 0; srcID < tmpRegNum; ++srcID)
-        dst[srcID + 1] = sel.selReg(sel.reg(FAMILY_DWORD));
  
        for (uint32_t valueID = 0; valueID < valueNum; ++valueID)
          src[valueID] = sel.selReg(insn.getValue(valueID), ir::TYPE_U64);
-      sel.WRITE64(addr, src, valueNum, dst, tmpRegNum + 1, bti);
+      sel.WRITE64(addr, src, valueNum, bti);
      }
  
      void emitByteScatter(Selection::Opaque &sel,
@@ -3083,6 +3056,12 @@ namespace gbe
          sel.curr.noMask = 1;
        }
  
+      // As we store long/ulong low/high part separately,
+      // we need to deal with it separately, we need to change it back again
+      // when hardware support native long type.
+      const bool isInt64 = (srcType == TYPE_S64 || srcType == TYPE_U64 || dstType == TYPE_S64 || dstType == TYPE_U64);
+      const int simdWidth = sel.curr.execWidth;
+
        for(int i = 0; i < narrowNum; i++, index++) {
          GenRegister narrowReg, wideReg;
          if(narrowDst) {
@@ -3103,16 +3082,26 @@ namespace gbe
              GBE_ASSERT(multiple == 8);
            }
          }
-        if(index % multiple) {
+
+        if(!isInt64 && index % multiple) {
            wideReg = GenRegister::offset(wideReg, 0, (index % multiple) * typeSize(wideReg.type));
            wideReg.subphysical = 1;
          }
+        if(isInt64) {
+          // offset to next half
+          wideReg.subphysical = 1;
+          if(i >= multiple/2)
+            wideReg = GenRegister::offset(wideReg, 0, sel.isScalarReg(wideReg.reg()) ? 4 : simdWidth*4);
+          if(index % (multiple/2))
+            wideReg = GenRegister::offset(wideReg, 0, (index % (multiple/2)) * typeSize(wideReg.type));
+        }
+
          GenRegister xdst = narrowDst ? narrowReg : wideReg;
          GenRegister xsrc = narrowDst ? wideReg : narrowReg;
  
-        if((srcType == TYPE_S64 || srcType == TYPE_U64 || srcType == TYPE_DOUBLE) ||
-           (dstType == TYPE_S64 || dstType == TYPE_U64 || dstType == TYPE_DOUBLE)) {
-          const int simdWidth = sel.curr.execWidth;
+        if(isInt64) {
+          sel.MOV(xdst, xsrc);
+        } else if(srcType == TYPE_DOUBLE || dstType == TYPE_DOUBLE) {
            sel.push();
              sel.curr.execWidth = 8;
              xdst.subphysical = 1;
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp

index 3d8b0b3..db65851 100644 (file)
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -675,24 +675,36 @@ namespace gbe
            continue;
  
          uint32_t alignment;
-        ir::RegisterFamily family;
-        getRegAttrib(reg, alignment, &family);
-        const uint32_t size = vector->regNum * alignment;
-        const uint32_t grfOffset = allocateReg(interval, size, alignment);
+        uint32_t size = 0;
+        for (uint32_t regID = 0; regID < vector->regNum; ++regID) {
+          getRegAttrib(vector->reg[regID].reg(), alignment, NULL);
+          size += alignment;
+        }
+        // FIXME this is workaround for scheduling limitation, which requires 2*GEN_REG_SIZE under SIMD16.
+        const uint32_t maxAlignment = ctx.getSimdWidth()/8*GEN_REG_SIZE;
+        const uint32_t grfOffset = allocateReg(interval, size, maxAlignment);
          if(grfOffset == 0) {
-          GBE_ASSERT(!(reservedReg && family != ir::FAMILY_DWORD));
+          ir::RegisterFamily family;
            for(int i = vector->regNum-1; i >= 0; i--) {
+            family = ctx.sel->getRegisterFamily(vector->reg[i].reg());
+            // we currently only support DWORD/QWORD spill
+            if(family != ir::FAMILY_DWORD && family != ir::FAMILY_QWORD)
+              return false;
              if (!spillReg(vector->reg[i].reg()))
                return false;
            }
            continue;
          }
+        uint32_t subOffset = 0;
          for (uint32_t regID = 0; regID < vector->regNum; ++regID) {
            const ir::Register reg = vector->reg[regID].reg();
-          GBE_ASSERT(RA.contains(reg) == false
-                     && ctx.sel->getRegisterData(reg).family == family);
-          insertNewReg(reg, grfOffset + alignment * regID, true);
-          ctx.splitBlock(grfOffset, alignment * regID);  //splitBlock will not split if regID == 0
+          GBE_ASSERT(RA.contains(reg) == false);
+          getRegAttrib(reg, alignment, NULL);
+          // check all sub registers aligned correctly
+          GBE_ASSERT((grfOffset + subOffset) % alignment == 0 || (grfOffset + subOffset) % GEN_REG_SIZE == 0);
+          insertNewReg(reg, grfOffset + subOffset, true);
+          ctx.splitBlock(grfOffset, subOffset);  //splitBlock will not split if regID == 0
+          subOffset += alignment;
          }
        }
        // Case 2: This is a regular scalar register, allocate it alone
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp

index 50a6dcd..3967e6e 100644 (file)
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -301,20 +301,25 @@ namespace gbe
        return false;
      }
  
-    INLINE GenRegister top_half(void) const {
-      GenRegister r = bottom_half();
-      r.subnr += 4;
-      r.nr += r.subnr / 32;
-      r.subnr %= 32;
-      return r;
+    INLINE GenRegister top_half(int simdWidth) const {
+      GBE_ASSERT(isint64());
+      GenRegister reg = retype(*this, type == GEN_TYPE_UL ? GEN_TYPE_UD : GEN_TYPE_D);
+
+      if (reg.hstride != GEN_HORIZONTAL_STRIDE_0) {
+        reg.subnr += simdWidth * typeSize(reg.type) * hstride_size(reg);
+        reg.nr += reg.subnr / 32;
+        reg.subnr %= 32;
+      } else {
+        reg.subnr += typeSize(reg.type);
+        reg.nr += reg.subnr/32;
+        reg.subnr %= 32;
+      }
+      return reg;
      }
  
      INLINE GenRegister bottom_half(void) const {
        GBE_ASSERT(isint64());
-      GenRegister r = h2(*this);
-      r.type = type == GEN_TYPE_UL ? GEN_TYPE_UD : GEN_TYPE_D;
-      if(r.vstride != GEN_VERTICAL_STRIDE_0)
-       r.vstride = GEN_VERTICAL_STRIDE_16;
+      GenRegister r = retype(*this, type == GEN_TYPE_UL ? GEN_TYPE_UD : GEN_TYPE_D);
        return r;
      }
  
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp

index 82429d0..bd111b0 100644 (file)
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1868,6 +1868,11 @@ namespace gbe
          uint32_t srcElemNum = 0, dstElemNum = 0 ;
          ir::Type srcType = getVectorInfo(ctx, srcValue->getType(), srcValue, srcElemNum);
          ir::Type dstType = getVectorInfo(ctx, dstValue->getType(), dstValue, dstElemNum);
+        // As long and double are not compatible in register storage
+        // and we do not support double yet, simply put an assert here
+        GBE_ASSERT(!(srcType == ir::TYPE_S64 && dstType == ir::TYPE_DOUBLE));
+        GBE_ASSERT(!(dstType == ir::TYPE_S64 && srcType == ir::TYPE_DOUBLE));
+
          if(srcElemNum > 1 || dstElemNum > 1) {
            // Build the tuple data in the vector
            vector<ir::Register> srcTupleData;
author	Ruiling Song <ruiling.song@intel.com>
	Thu, 29 May 2014 02:29:33 +0000 (10:29 +0800)
committer	Zhigang Gong <zhigang.gong@intel.com>
	Thu, 29 May 2014 06:49:03 +0000 (14:49 +0800)
backend/src/backend/gen_context.cpp		patch \| blob \| history
backend/src/backend/gen_encoder.cpp		patch \| blob \| history
backend/src/backend/gen_encoder.hpp		patch \| blob \| history
backend/src/backend/gen_insn_selection.cpp		patch \| blob \| history
backend/src/backend/gen_reg_allocation.cpp		patch \| blob \| history
backend/src/backend/gen_register.hpp		patch \| blob \| history
backend/src/llvm/llvm_gen_backend.cpp		patch \| blob \| history