Make integer remainder & division arithmetic work ~ ~
authorHomer Hsing <homer.xing@intel.com>
Tue, 22 Jan 2013 02:59:37 +0000 (10:59 +0800)
committerZhigang Gong <zhigang.gong@linux.intel.com>
Wed, 10 Apr 2013 06:52:32 +0000 (14:52 +0800)
Intel Gen7 GPU cannot do integer remainder (or division) if exec_width is 16.
But we are generating such kind of cmds, so the GPU hangs.
We need to split such command ~ ~

before:
(+f0) math intmod(16) g18<1>D g12<8,8,1>D g2.1<0,1,0>D {align1 WE_normal 1Q};

after:
(+f0) math intmod(8) g18<1>D g12<8,8,1>D g2.1<0,1,0>D {align1 WE_normal 1Q};
(+f0) math intmod(8) g19<1>D g13<8,8,1>D g2.1<0,1,0>D {align1 WE_normal 1Q};

Signed-off-by: Homer Hsing <homer.xing@intel.com>
Reviewed-by: Lu Guanqun <guanqun.lu@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
backend/src/backend/gen_encoder.cpp
backend/src/backend/gen_insn_selection.cpp

index 70c7ce8..ed7c256 100644 (file)
@@ -766,6 +766,24 @@ namespace gbe
      this->setDst(insn, dst);
      this->setSrc0(insn, src0);
      this->setSrc1(insn, src1);
+
+     if (function == GEN_MATH_FUNCTION_INT_DIV_QUOTIENT ||
+         function == GEN_MATH_FUNCTION_INT_DIV_REMAINDER) {
+        assert(insn->header.execution_size == GEN_WIDTH_16);
+        insn->header.execution_size = GEN_WIDTH_8;
+
+        GenInstruction *insn2 = this->next(GEN_OPCODE_MATH);
+        GenRegister new_dest, new_src0, new_src1;
+        new_dest = GenRegister::QnPhysical(dst, 1);
+        new_src0 = GenRegister::QnPhysical(src0, 1);
+        new_src1 = GenRegister::QnPhysical(src1, 1);
+        insn2->header.destreg_or_condmod = function;
+        this->setHeader(insn2);
+        insn2->header.execution_size = GEN_WIDTH_8;
+        this->setDst(insn2, new_dest);
+        this->setSrc0(insn2, new_src0);
+        this->setSrc1(insn2, new_src1);
+     }
   }
 
   void GenEncoder::MATH(GenRegister dst, uint32_t function, GenRegister src) {
index f3fdd6f..ecaaeeb 100644 (file)
@@ -1164,6 +1164,16 @@ namespace gbe
         markAllChildren(dag);
         return true;
       }
+      if (opcode == OP_REM) {
+        GenRegister src0 = sel.selReg(insn.getSrc(0), type);
+        GenRegister src1 = sel.selReg(insn.getSrc(1), type);
+        if (type == TYPE_U32 || type == TYPE_S32) {
+          sel.MATH(dst, GEN_MATH_FUNCTION_INT_DIV_REMAINDER, src0, src1);
+          markAllChildren(dag);
+        } else
+          NOT_IMPLEMENTED;
+        return true;
+      }
 
       sel.push();