add opencl-1.2 builtin function popcount.

author Luo <xionghu.luo@intel.com>

Fri, 10 Oct 2014 03:05:04 +0000 (11:05 +0800)

committer Zhigang Gong <zhigang.gong@intel.com>

Tue, 14 Oct 2014 05:44:13 +0000 (13:44 +0800)
author Luo <xionghu.luo@intel.com>
Fri, 10 Oct 2014 03:05:04 +0000 (11:05 +0800)
committer Zhigang Gong <zhigang.gong@intel.com>
Tue, 14 Oct 2014 05:44:13 +0000 (13:44 +0800)
diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c

index 231185a..2412404 100644 (file)
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -67,6 +67,7 @@ static const struct {
    [GEN_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
    [GEN_OPCODE_FBH] = { .name = "fbh", .nsrc = 1, .ndst = 1 },
    [GEN_OPCODE_FBL] = { .name = "fbl", .nsrc = 1, .ndst = 1 },
+  [GEN_OPCODE_CBIT] = { .name = "cbit", .nsrc = 1, .ndst = 1 },
    [GEN_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 },
    [GEN_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 },
  
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp

index 991214d..ead455f 100644 (file)
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -205,6 +205,7 @@ namespace gbe
        case SEL_OP_READ_ARF: p->MOV(dst, src); break;
        case SEL_OP_FBH: p->FBH(dst, src); break;
        case SEL_OP_FBL: p->FBL(dst, src); break;
+      case SEL_OP_CBIT: p->CBIT(dst, src); break;
        case SEL_OP_NOT: p->NOT(dst, src); break;
        case SEL_OP_RNDD: p->RNDD(dst, src); break;
        case SEL_OP_RNDU: p->RNDU(dst, src); break;
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp

index b40096d..385fdfa 100644 (file)
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -161,6 +161,7 @@ enum opcode {
    GEN_OPCODE_LZD = 74,
    GEN_OPCODE_FBH = 75,
    GEN_OPCODE_FBL = 76,
+  GEN_OPCODE_CBIT = 77,
    GEN_OPCODE_ADDC = 78,
    GEN_OPCODE_SUBB = 79,
    GEN_OPCODE_SAD2 = 80,
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp

index f799ad7..847ab7b 100644 (file)
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -685,6 +685,7 @@ namespace gbe
    ALU1(RNDU)
    ALU1(FBH)
    ALU1(FBL)
+  ALU1(CBIT)
    ALU1(F16TO32)
    ALU1(F32TO16)
    ALU2(SEL)
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp

index 0b6154d..3df7a57 100644 (file)
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -99,6 +99,7 @@ namespace gbe
      ALU1(MOV)
      ALU1(FBH)
      ALU1(FBL)
+    ALU1(CBIT)
      ALU2(SUBB)
      ALU2(UPSAMPLE_SHORT)
      ALU2(UPSAMPLE_INT)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp

index b2df76f..2b7bf47 100644 (file)
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -495,6 +495,7 @@ namespace gbe
      ALU2WithTemp(MUL_HI)
      ALU1(FBH)
      ALU1(FBL)
+    ALU1(CBIT)
      ALU2WithTemp(HADD)
      ALU2WithTemp(RHADD)
      ALU2(UPSAMPLE_SHORT)
@@ -1869,7 +1870,7 @@ namespace gbe
      static ir::Type getType(const ir::Opcode opcode, const ir::Type insnType) {
        if (insnType == ir::TYPE_S64 || insnType == ir::TYPE_U64 || insnType == ir::TYPE_S8 || insnType == ir::TYPE_U8)
          return insnType;
-      if (opcode == ir::OP_FBH || opcode == ir::OP_FBL)
+      if (opcode == ir::OP_FBH || opcode == ir::OP_FBL || opcode == ir::OP_CBIT)
          return ir::TYPE_U32;
        if (insnType == ir::TYPE_S16 || insnType == ir::TYPE_U16)
          return insnType;
@@ -1923,6 +1924,7 @@ namespace gbe
            case ir::OP_RNDZ: sel.RNDZ(dst, src); break;
            case ir::OP_FBH: sel.FBH(dst, src); break;
            case ir::OP_FBL: sel.FBL(dst, src); break;
+          case ir::OP_CBIT: sel.CBIT(dst, src); break;
            case ir::OP_COS: sel.MATH(dst, GEN_MATH_FUNCTION_COS, src); break;
            case ir::OP_SIN: sel.MATH(dst, GEN_MATH_FUNCTION_SIN, src); break;
            case ir::OP_LOG: sel.MATH(dst, GEN_MATH_FUNCTION_LOG, src); break;
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx

index 7511b84..d80dc58 100644 (file)
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -68,6 +68,7 @@ DECL_SELECTION_IR(MUL_HI, BinaryWithTempInstruction)
  DECL_SELECTION_IR(I64_MUL_HI, I64MULHIInstruction)
  DECL_SELECTION_IR(FBH, UnaryInstruction)
  DECL_SELECTION_IR(FBL, UnaryInstruction)
+DECL_SELECTION_IR(CBIT, UnaryInstruction)
  DECL_SELECTION_IR(HADD, BinaryWithTempInstruction)
  DECL_SELECTION_IR(RHADD, BinaryWithTempInstruction)
  DECL_SELECTION_IR(I64HADD, I64HADDInstruction)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp

index e4e30ed..6c37f29 100644 (file)
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1594,6 +1594,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex
    DECL_EMIT_FUNCTION(MOV)
    DECL_EMIT_FUNCTION(FBH)
    DECL_EMIT_FUNCTION(FBL)
+  DECL_EMIT_FUNCTION(CBIT)
    DECL_EMIT_FUNCTION(COS)
    DECL_EMIT_FUNCTION(SIN)
    DECL_EMIT_FUNCTION(LOG)
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp

index 6807615..1c31171 100644 (file)
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -585,6 +585,8 @@ namespace ir {
    Instruction FBH(Type type, Register dst, Register src);
    /*! fbl.type dst src */
    Instruction FBL(Type type, Register dst, Register src);
+  /*! cbit.type dst src */
+  Instruction CBIT(Type type, Register dst, Register src);
    /*! hadd.type dst src */
    Instruction HADD(Type type, Register dst, Register src0, Register src1);
    /*! rhadd.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx

index 5fed286..9a89069 100644 (file)
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -86,6 +86,7 @@ DECL_INSN(MUL_HI, BinaryInstruction)
  DECL_INSN(I64_MUL_HI, BinaryInstruction)
  DECL_INSN(FBH, UnaryInstruction)
  DECL_INSN(FBL, UnaryInstruction)
+DECL_INSN(CBIT, UnaryInstruction)
  DECL_INSN(HADD, BinaryInstruction)
  DECL_INSN(RHADD, BinaryInstruction)
  DECL_INSN(I64HADD, BinaryInstruction)
diff --git a/backend/src/libocl/script/ocl_integer.def b/backend/src/libocl/script/ocl_integer.def

index ec9177a..c35c242 100644 (file)
--- a/backend/src/libocl/script/ocl_integer.def
+++ b/backend/src/libocl/script/ocl_integer.def
@@ -23,8 +23,7 @@ uintn upsample (ushortn hi, ushortn lo)
  longn upsample (intn hi, uintn lo)
  ulongn upsample (uintn hi, uintn lo)
  
-# XXX not implemented
-#gentype popcount (gentype x)
+gentype popcount (gentype x)
  
  ##fast_integer
  gentype mad24 (gentype x, gentype y, gentype z)
diff --git a/backend/src/libocl/tmpl/ocl_integer.tmpl.cl b/backend/src/libocl/tmpl/ocl_integer.tmpl.cl

index 9230604..375a40f 100644 (file)
--- a/backend/src/libocl/tmpl/ocl_integer.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_integer.tmpl.cl
@@ -19,6 +19,7 @@
  
  PURE CONST uint __gen_ocl_fbh(uint);
  PURE CONST uint __gen_ocl_fbl(uint);
+PURE CONST uint __gen_ocl_cbit(uint);
  
  OVERLOADABLE char clz(char x) {
    if (x < 0)
@@ -86,6 +87,35 @@ OVERLOADABLE ulong clz(ulong x) {
    return v;
  }
  
+OVERLOADABLE char popcount(char x) {
+  return x == 0 ? 0 : x < 0?__gen_ocl_cbit(x) - 24 : __gen_ocl_cbit(x);
+}
+OVERLOADABLE short popcount(short x) {
+  return x == 0 ? 0 : x < 0?__gen_ocl_cbit(x) - 16 : __gen_ocl_cbit(x);
+}
+#define SDEF(TYPE)        \
+OVERLOADABLE TYPE popcount(TYPE x){ return x == 0? 0:__gen_ocl_cbit(x);}
+SDEF(uchar);
+SDEF(ushort);
+SDEF(int);
+SDEF(uint);
+#undef SDEF
+
+OVERLOADABLE long popcount(long x) {
+  union { int i[2]; long x; } u;
+  u.x = x;
+  uint v = popcount(u.i[1]);
+  v += popcount(u.i[0]);
+  return v;
+}
+
+OVERLOADABLE ulong popcount(ulong x) {
+  union { uint i[2]; ulong x; } u;
+  u.x = x;
+  uint v = popcount(u.i[1]);
+  v += popcount(u.i[0]);
+  return v;
+}
  
  // sat
  #define SDEF(TYPE)        \
diff --git a/backend/src/libocl/tmpl/ocl_integer.tmpl.h b/backend/src/libocl/tmpl/ocl_integer.tmpl.h

index 2869bb6..0b3dea4 100644 (file)
--- a/backend/src/libocl/tmpl/ocl_integer.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_integer.tmpl.h
@@ -45,6 +45,15 @@ OVERLOADABLE uint clz(uint x);
  OVERLOADABLE long clz(long x);
  OVERLOADABLE ulong clz(ulong x);
  
+OVERLOADABLE char popcount(char x);
+OVERLOADABLE uchar popcount(uchar x);
+OVERLOADABLE short popcount(short x);
+OVERLOADABLE ushort popcount(ushort x);
+OVERLOADABLE int popcount(int x);
+OVERLOADABLE uint popcount(uint x);
+OVERLOADABLE long popcount(long x);
+OVERLOADABLE ulong popcount(ulong x);
+
  OVERLOADABLE char mul_hi(char x, char y);
  OVERLOADABLE uchar mul_hi(uchar x, uchar y);
  OVERLOADABLE short mul_hi(short x, short y);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp

index 39b441f..39e22d7 100644 (file)
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2494,6 +2494,7 @@ namespace gbe
          regTranslator.newScalarProxy(ir::ocl::workdim, dst); break;
        case GEN_OCL_FBH:
        case GEN_OCL_FBL:
+      case GEN_OCL_CBIT:
        case GEN_OCL_COS:
        case GEN_OCL_SIN:
        case GEN_OCL_SQR:
@@ -2779,6 +2780,7 @@ namespace gbe
            }
            case GEN_OCL_FBH: this->emitUnaryCallInst(I,CS,ir::OP_FBH); break;
            case GEN_OCL_FBL: this->emitUnaryCallInst(I,CS,ir::OP_FBL); break;
+          case GEN_OCL_CBIT: this->emitUnaryCallInst(I,CS,ir::OP_CBIT); break;
            case GEN_OCL_ABS:
            {
              const ir::Register src = this->getRegister(*AI);
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx

index f508bcc..7434c78 100644 (file)
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -151,6 +151,7 @@ DECL_LLVM_GEN_FUNCTION(I64RHADD, _Z15__gen_ocl_rhaddmm)
  DECL_LLVM_GEN_FUNCTION(UPSAMPLE_SHORT, _Z18__gen_ocl_upsampless)
  DECL_LLVM_GEN_FUNCTION(UPSAMPLE_INT, _Z18__gen_ocl_upsampleii)
  DECL_LLVM_GEN_FUNCTION(UPSAMPLE_LONG, _Z18__gen_ocl_upsamplell)
+DECL_LLVM_GEN_FUNCTION(CBIT, __gen_ocl_cbit)
  
  // saturate convert
  DECL_LLVM_GEN_FUNCTION(SAT_CONV_U8_TO_I8,  _Z16convert_char_sath)
author	Luo <xionghu.luo@intel.com>
	Fri, 10 Oct 2014 03:05:04 +0000 (11:05 +0800)
committer	Zhigang Gong <zhigang.gong@intel.com>
	Tue, 14 Oct 2014 05:44:13 +0000 (13:44 +0800)
backend/src/backend/gen/gen_mesa_disasm.c		patch \| blob \| history
backend/src/backend/gen_context.cpp		patch \| blob \| history
backend/src/backend/gen_defs.hpp		patch \| blob \| history
backend/src/backend/gen_encoder.cpp		patch \| blob \| history
backend/src/backend/gen_encoder.hpp		patch \| blob \| history
backend/src/backend/gen_insn_selection.cpp		patch \| blob \| history
backend/src/backend/gen_insn_selection.hxx		patch \| blob \| history
backend/src/ir/instruction.cpp		patch \| blob \| history
backend/src/ir/instruction.hpp		patch \| blob \| history
backend/src/ir/instruction.hxx		patch \| blob \| history
backend/src/libocl/script/ocl_integer.def		patch \| blob \| history
backend/src/libocl/tmpl/ocl_integer.tmpl.cl		patch \| blob \| history
backend/src/libocl/tmpl/ocl_integer.tmpl.h		patch \| blob \| history
backend/src/llvm/llvm_gen_backend.cpp		patch \| blob \| history
backend/src/llvm/llvm_gen_ocl_function.hxx		patch \| blob \| history