--- /dev/null
+#include "stdlib.h"
+__kernel void extract(__global int4 *dst, __global int4 *src, int c)
+{
+ const int4 from = src[0];
+ dst[0] = (int4)(from.x, 1, 2, 3);
+}
+
--- /dev/null
+; ModuleID = 'extract.o'
+target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
+target triple = "ptx32--"
+
+define ptx_kernel void @extract(<4 x i32>* nocapture %dst, <4 x i32>* nocapture %src, i32 %c) nounwind noinline {
+entry:
+ %0 = load <4 x i32>* %src, align 16, !tbaa !1
+ %1 = extractelement <4 x i32> %0, i32 0
+ %vecinit = insertelement <4 x i32> undef, i32 %1, i32 0
+ %vecinit1 = insertelement <4 x i32> %vecinit, i32 1, i32 1
+ %vecinit2 = insertelement <4 x i32> %vecinit1, i32 2, i32 2
+ %vecinit3 = insertelement <4 x i32> %vecinit2, i32 3, i32 3
+ store <4 x i32> %vecinit3, <4 x i32>* %dst, align 16, !tbaa !1
+ ret void
+}
+
+!opencl.kernels = !{!0}
+
+!0 = metadata !{void (<4 x i32>*, <4 x i32>*, i32)* @extract}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
#include "stdlib.h"
__kernel void insert(__global int4 *dst, __global int4 *src, int c)
{
- dst[0].x = dst[0][c];
- dst[0].yzw = dst[1].xyz + src[0].xyz;
+ int4 x = src[0];
+ src[0].z = 1.f;
+ dst[0] = src[0];
}
define ptx_kernel void @insert(<4 x i32>* nocapture %dst, <4 x i32>* nocapture %src, i32 %c) nounwind noinline {
entry:
- %0 = load <4 x i32>* %dst, align 16, !tbaa !1
- %vecext = extractelement <4 x i32> %0, i32 %c
- %1 = insertelement <4 x i32> %0, i32 %vecext, i32 0
- store <4 x i32> %1, <4 x i32>* %dst, align 16
- %arrayidx2 = getelementptr inbounds <4 x i32>* %dst, i32 1
- %2 = load <4 x i32>* %arrayidx2, align 16
- %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
- %4 = load <4 x i32>* %src, align 16
- %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
- %add = add <3 x i32> %3, %5
- %6 = shufflevector <3 x i32> %add, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
- %7 = shufflevector <4 x i32> %1, <4 x i32> %6, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
- store <4 x i32> %7, <4 x i32>* %dst, align 16
+ %0 = load <4 x i32>* %src, align 16
+ %1 = insertelement <4 x i32> %0, i32 1, i32 2
+ store <4 x i32> %1, <4 x i32>* %src, align 16
+ store <4 x i32> %1, <4 x i32>* %dst, align 16, !tbaa !1
ret void
}
--- /dev/null
+#include "stdlib.h"
+
+__kernel void test_select(__global int4 *dst,
+ __global int4 *src0,
+ __global int4 *src1)
+{
+ const int4 from = select(src0[0], src0[1], src0[1]);
+ dst[0] = from;
+}
--- /dev/null
+; ModuleID = 'select.o'
+target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
+target triple = "ptx32--"
+
+define ptx_kernel void @test_select(<4 x i32>* nocapture %dst, <4 x i32>* nocapture %src0, <4 x i32>* nocapture %src1) nounwind noinline {
+entry:
+ %0 = load <4 x i32>* %src0, align 16, !tbaa !1
+ %arrayidx1 = getelementptr inbounds <4 x i32>* %src0, i32 1
+ %1 = load <4 x i32>* %arrayidx1, align 16, !tbaa !1
+ %2 = extractelement <4 x i32> %0, i32 0
+ %3 = extractelement <4 x i32> %1, i32 0
+ %4 = extractelement <4 x i32> %0, i32 1
+ %5 = extractelement <4 x i32> %1, i32 1
+ %6 = extractelement <4 x i32> %0, i32 2
+ %7 = extractelement <4 x i32> %1, i32 2
+ %8 = extractelement <4 x i32> %0, i32 3
+ %9 = extractelement <4 x i32> %1, i32 3
+ %tobool.i = icmp slt i32 %3, 0
+ %cond1.i = select i1 %tobool.i, i32 %3, i32 %2
+ %10 = insertelement <4 x i32> undef, i32 %cond1.i, i32 0
+ %tobool3.i = icmp slt i32 %5, 0
+ %cond7.i = select i1 %tobool3.i, i32 %5, i32 %4
+ %11 = insertelement <4 x i32> %10, i32 %cond7.i, i32 1
+ %tobool9.i = icmp slt i32 %7, 0
+ %cond13.i = select i1 %tobool9.i, i32 %7, i32 %6
+ %12 = insertelement <4 x i32> %11, i32 %cond13.i, i32 2
+ %tobool15.i = icmp slt i32 %9, 0
+ %cond19.i = select i1 %tobool15.i, i32 %9, i32 %8
+ %13 = insertelement <4 x i32> %12, i32 %cond19.i, i32 3
+ store <4 x i32> %13, <4 x i32>* %dst, align 16, !tbaa !1
+ ret void
+}
+
+!opencl.kernels = !{!0}
+
+!0 = metadata !{void (<4 x i32>*, <4 x i32>*, <4 x i32>*)* @test_select}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
--- /dev/null
+#include "stdlib.h"
+__kernel void shuffle(__global int4 *dst, __global int4 *src, int c)
+{
+ const int4 from = src[0];
+ dst[0] = from.xywz;
+}
+
--- /dev/null
+; ModuleID = 'shuffle.o'
+target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
+target triple = "ptx32--"
+
+define ptx_kernel void @shuffle(<4 x i32>* nocapture %dst, <4 x i32>* nocapture %src, i32 %c) nounwind noinline {
+entry:
+ %0 = load <4 x i32>* %src, align 16, !tbaa !1
+ %1 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
+ store <4 x i32> %1, <4 x i32>* %dst, align 16, !tbaa !1
+ ret void
+}
+
+!opencl.kernels = !{!0}
+
+!0 = metadata !{void (<4 x i32>*, <4 x i32>*, i32)* @shuffle}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
else return 0;
}
+__attribute__((overloadable)) inline unsigned select(unsigned src0, unsigned src1, unsigned cond) {
+ return cond ? src0 : src1;
+}
+
+__attribute__((overloadable)) inline int select(int src0, int src1, int cond) {
+ return cond ? src0 : src1;
+}
+
typedef float float2 __attribute__((ext_vector_type(2)));
typedef float float3 __attribute__((ext_vector_type(3)));
typedef float float4 __attribute__((ext_vector_type(4)));
typedef int int2 __attribute__((ext_vector_type(2)));
typedef int int3 __attribute__((ext_vector_type(3)));
typedef int int4 __attribute__((ext_vector_type(4)));
+typedef int uint2 __attribute__((ext_vector_type(2)));
+typedef unsigned uint3 __attribute__((ext_vector_type(3)));
+typedef unsigned uint4 __attribute__((ext_vector_type(4)));
typedef bool bool2 __attribute__((ext_vector_type(2)));
typedef bool bool3 __attribute__((ext_vector_type(3)));
typedef bool bool4 __attribute__((ext_vector_type(4)));
-#define DECL_SELECT(TYPE) \
-__attribute__((overloadable)) \
-inline TYPE select(bool b, TYPE x, TYPE y) { \
- if (b) return x; else return y; \
+__attribute__((overloadable)) inline int4 select(int4 src0, int4 src1, int4 cond) {
+ int4 dst;
+ const int x0 = src0.x; // Fix performance issue with CLANG
+ const int x1 = src1.x;
+ const int y0 = src0.y;
+ const int y1 = src1.y;
+ const int z0 = src0.z;
+ const int z1 = src1.z;
+ const int w0 = src0.w;
+ const int w1 = src1.w;
+
+ dst.x = (cond.x & 0x80000000) ? x1 : x0;
+ dst.y = (cond.y & 0x80000000) ? y1 : y0;
+ dst.z = (cond.z & 0x80000000) ? z1 : z0;
+ dst.w = (cond.w & 0x80000000) ? w1 : w0;
+ return dst;
}
-#define DECL_SELECT_ALL(TYPE) \
- DECL_SELECT(TYPE) \
- DECL_SELECT(TYPE##2) \
- DECL_SELECT(TYPE##3) \
- DECL_SELECT(TYPE##4)
-DECL_SELECT_ALL(int)
-DECL_SELECT_ALL(float)
-#undef DECL_SELECT_ALL
-#undef DECL_SELECT
INLINE PointerSize getPointerSize(void) const {
return unit.getPointerSize();
}
- /*! MAD with sources directly specified */
- INLINE void MAD(Type type,
- Register dst,
- Register src0,
- Register src1,
- Register src2)
- {
- const Tuple index = this->tuple(src0, src1, src2);
- return this->MAD(type, dst, index);
+
+#define DECL_THREE_SRC_INSN(NAME) \
+ INLINE void NAME(Type type, \
+ Register dst, \
+ Register src0, \
+ Register src1, \
+ Register src2) \
+ { \
+ const Tuple index = this->tuple(src0, src1, src2); \
+ return this->NAME(type, dst, index); \
}
+ DECL_THREE_SRC_INSN(MAD);
+ DECL_THREE_SRC_INSN(SEL);
+#undef DECL_THREE_SRC_INSN
/*! LOAD with the destinations directly specified */
template <typename... Args>
Tuple src; //!< 3 sources do not fit in 8 bytes -> use a tuple
};
+ /*! As for MADs, three sources mean we need a tuple to encode it */
+ class ALIGNED_INSTRUCTION SelectInstruction :
+ public BasePolicy
+ {
+ public:
+ SelectInstruction(Type type,
+ Register dst,
+ Tuple src)
+ {
+ this->opcode = OP_SEL;
+ this->type = type;
+ this->dst = dst;
+ this->src = src;
+ }
+ INLINE uint32_t getSrcNum(void) const { return 3; }
+ INLINE uint32_t getDstNum(void) const { return 1; }
+ INLINE Register getDstIndex(const Function &fn, uint32_t ID) const {
+ GBE_ASSERTM(ID == 0, "Only one destination for the instruction");
+ return dst;
+ }
+ INLINE Register getSrcIndex(const Function &fn, uint32_t ID) const {
+ GBE_ASSERTM(ID < 3, "Out-of-bound source register");
+ return fn.getRegister(src, ID);
+ }
+ INLINE Type getType(void) const { return this->type; }
+ INLINE bool wellFormed(const Function &fn, std::string &whyNot) const;
+ INLINE void out(std::ostream &out, const Function &fn) const;
+ Type type; //!< Type of the instruction
+ Register dst; //!< Dst is the register index
+ Tuple src; //!< 3 sources do not fit in 8 bytes -> use a tuple
+ };
+
/*! Comparison instructions take two sources of the same type and return a
* boolean value. Since it is pretty similar to binary instruction, we
* steal all the methods from it, except wellFormed (dst register is always
whyNot = "Out-of-bound index for ternary instruction";
return false;
}
- for (uint32_t srcID = 0; srcID < 3u; ++srcID) {
+ for (uint32_t srcID = 0; srcID < 3; ++srcID) {
+ const Register regID = fn.getRegister(src, srcID);
+ if (UNLIKELY(checkRegisterData(family, regID, fn, whyNot) == false))
+ return false;
+ }
+ return true;
+ }
+
+ // First source must a boolean. Other must match the destination type
+ INLINE bool SelectInstruction::wellFormed(const Function &fn, std::string &whyNot) const
+ {
+ const RegisterData::Family family = getFamily(this->type);
+ if (UNLIKELY(checkRegisterData(family, dst, fn, whyNot) == false))
+ return false;
+ if (UNLIKELY(src + 3u > fn.tupleNum())) {
+ whyNot = "Out-of-bound index for ternary instruction";
+ return false;
+ }
+ const Register regID = fn.getRegister(src, 0);
+ if (UNLIKELY(checkRegisterData(RegisterData::BOOL, regID, fn, whyNot) == false))
+ return false;
+ for (uint32_t srcID = 1; srcID < 3; ++srcID) {
const Register regID = fn.getRegister(src, srcID);
if (UNLIKELY(checkRegisterData(family, regID, fn, whyNot) == false))
return false;
out << " %" << this->getSrcIndex(fn, i);
}
+ template <typename T>
+ static void ternaryOrSelectOut(const T &insn, std::ostream &out, const Function &fn) {
+ insn.outOpcode(out);
+ out << "." << insn.getType()
+ << " %" << insn.getDstIndex(fn, 0)
+ << " %" << insn.getSrcIndex(fn, 0)
+ << " %" << insn.getSrcIndex(fn, 1)
+ << " %" << insn.getSrcIndex(fn, 2);
+ }
+
INLINE void TernaryInstruction::out(std::ostream &out, const Function &fn) const {
- this->outOpcode(out);
- out << "." << this->getType()
- << " %" << this->getDstIndex(fn, 0)
- << " %" << this->getSrcIndex(fn, 0)
- << " %" << this->getSrcIndex(fn, 1)
- << " %" << this->getSrcIndex(fn, 2);
+ ternaryOrSelectOut(*this, out, fn);
+ }
+
+ INLINE void SelectInstruction::out(std::ostream &out, const Function &fn) const {
+ ternaryOrSelectOut(*this, out, fn);
}
INLINE void ConvertInstruction::out(std::ostream &out, const Function &fn) const {
DECL_MEM_FN(UnaryInstruction, Type, getType(void), getType())
DECL_MEM_FN(BinaryInstruction, Type, getType(void), getType())
DECL_MEM_FN(TernaryInstruction, Type, getType(void), getType())
+DECL_MEM_FN(SelectInstruction, Type, getType(void), getType())
DECL_MEM_FN(CompareInstruction, Type, getType(void), getType())
DECL_MEM_FN(ConvertInstruction, Type, getSrcType(void), getSrcType())
DECL_MEM_FN(ConvertInstruction, Type, getDstType(void), getDstType())
return insn.convert();
}
+ // SEL
+ Instruction SEL(Type type, Register dst, Tuple src) {
+ internal::SelectInstruction insn(type, dst, src);
+ return insn.convert();
+ }
+
// All compare functions
#define DECL_EMIT_FUNCTION(NAME) \
Instruction NAME(Type type, Register dst, Register src0, Register src1) { \
static bool isClassOf(const Instruction &insn);
};
+ /*! Select instructions writes src0 to dst if cond is true. Otherwise, it
+ * writes src1
+ */
+ class SelectInstruction {
+ public:
+ /*! Get the type of both sources */
+ Type getType(void) const;
+ /*! Return true if the given instruction is an instance of this class */
+ static bool isClassOf(const Instruction &insn);
+ };
+
/*! Compare instructions compare anything from the same type and return a
* boolean value
*/
Instruction XOR(Type type, Register dst, Register src0, Register src1);
/*! and.type dst src0 src1 */
Instruction AND(Type type, Register dst, Register src0, Register src1);
- /*! mad.type dst {src0, src1, src2} == src */
+ /*! mad.type dst {src0, src1, src2} (== src) */
Instruction MAD(Type type, Register dst, Tuple src);
+ /*! sel.type dst {cond, src0, src1} (== src) */
+ Instruction SEL(Type type, Register dst, Tuple src);
/*! eq.type dst src0 src1 */
Instruction EQ(Type type, Register dst, Register src0, Register src1);
/*! ne.type dst src0 src1 */
DECL_INSN(XOR, BinaryInstruction)
DECL_INSN(AND, BinaryInstruction)
DECL_INSN(MAD, TernaryInstruction)
+DECL_INSN(SEL, SelectInstruction)
DECL_INSN(EQ, CompareInstruction)
DECL_INSN(NE, CompareInstruction)
DECL_INSN(LE, CompareInstruction)
GBE_ASSERT(scalarMap.find(key) != scalarMap.end());
return scalarMap[key];
}
-
+ /*! Insert a given register at given Value position */
+ void insertRegister(const ir::Register ®, Value *value, uint32_t index) {
+ const auto key = std::make_pair(value, index);
+ GBE_ASSERT(scalarMap.find(key) == scalarMap.end());
+ scalarMap[key] = reg;
+ }
private:
- /*! This maps a scalar register to a Value (index is the vector index when
+ /*! This creates a scalar register for a Value (index is the vector index when
* the value is a vector of scalars)
*/
ir::Register newScalar(Value *value, Type *type, uint32_t index) {
- const auto key = std::make_pair(value, index);
- GBE_ASSERT(scalarMap.find(key) == scalarMap.end());
const ir::RegisterData::Family family = getFamily(ctx, type);
const ir::Register reg = ctx.reg(family);
- scalarMap[key] = reg;
+ this->insertRegister(reg, value, index);
return reg;
}
/*! Indices will be zero for scalar values */
DECL_VISIT_FN(CallInst, CallInst);
DECL_VISIT_FN(ICmpInst, ICmpInst);
DECL_VISIT_FN(FCmpInst, FCmpInst);
+ DECL_VISIT_FN(InsertElement, InsertElementInst);
+ DECL_VISIT_FN(ExtractElement, ExtractElementInst);
+ DECL_VISIT_FN(ShuffleVectorInst, ShuffleVectorInst);
+ DECL_VISIT_FN(SelectInst, SelectInst);
#undef DECL_VISIT_FN
// Must be implemented later
- void visitInsertElementInst(InsertElementInst &I) {NOT_SUPPORTED;}
- void visitExtractElementInst(ExtractElementInst &I) {NOT_SUPPORTED;}
- void visitShuffleVectorInst(ShuffleVectorInst &SVI) {NOT_SUPPORTED;}
void visitPHINode(PHINode &I) {NOT_SUPPORTED;}
void visitBranchInst(BranchInst &I) {NOT_SUPPORTED;}
- void visitSelectInst(SelectInst &I) {NOT_SUPPORTED;}
// These instructions are not supported at all
void visitVAArgInst(VAArgInst &I) {NOT_SUPPORTED;}
return false;
}
- ir::ImmediateIndex GenWriter::newImmediate(Constant *CPV) {
+ template <typename U, typename T>
+ static U processConstant(Constant *CPV, T doIt)
+ {
if (dyn_cast<ConstantExpr>(CPV))
GBE_ASSERTM(false, "Unsupported constant expression");
else if (isa<UndefValue>(CPV) && CPV->getType()->isSingleValueType())
Type* Ty = CI->getType();
if (Ty == Type::getInt1Ty(CPV->getContext())) {
const bool b = CI->getZExtValue();
- return ctx.newImmediate(b);
+ return doIt(b);
} else if (Ty == Type::getInt8Ty(CPV->getContext())) {
const uint8_t u8 = CI->getZExtValue();
- return ctx.newImmediate(u8);
+ return doIt(u8);
} else if (Ty == Type::getInt16Ty(CPV->getContext())) {
const uint16_t u16 = CI->getZExtValue();
- return ctx.newImmediate(u16);
+ return doIt(u16);
} else if (Ty == Type::getInt32Ty(CPV->getContext())) {
const uint32_t u32 = CI->getZExtValue();
- return ctx.newImmediate(u32);
+ return doIt(u32);
} else if (Ty == Type::getInt64Ty(CPV->getContext())) {
const uint64_t u64 = CI->getZExtValue();
- return ctx.newImmediate(u64);
+ return doIt(u64);
} else {
GBE_ASSERTM(false, "Unsupported integer size");
- return ctx.newImmediate(uint64_t(0));
+ return doIt(uint64_t(0));
}
}
ConstantFP *FPC = cast<ConstantFP>(CPV);
if (FPC->getType() == Type::getFloatTy(CPV->getContext())) {
const float f32 = FPC->getValueAPF().convertToFloat();
- return ctx.newImmediate(f32);
+ return doIt(f32);
} else {
const double f64 = FPC->getValueAPF().convertToDouble();
- return ctx.newImmediate(f64);
+ return doIt(f64);
}
}
break;
default:
GBE_ASSERTM(false, "Unsupported constant type");
}
- return ctx.newImmediate(uint64_t(0));
+ const uint64_t imm(8);
+ return doIt(imm);
+ }
+
+ /*! Pfff. I cannot use a lambda, since it is templated. Congratulation c++ */
+ struct NewImmediateFunctor
+ {
+ NewImmediateFunctor(ir::Context &ctx) : ctx(ctx) {}
+ template <typename T> ir::ImmediateIndex operator() (const T &t) {
+ return ctx.newImmediate(t);
+ }
+ ir::Context &ctx;
+ };
+
+ ir::ImmediateIndex GenWriter::newImmediate(Constant *CPV) {
+ return processConstant<ir::ImmediateIndex>(CPV, NewImmediateFunctor(ctx));
}
void GenWriter::newRegister(Value *value) {
}
void GenWriter::emitBinaryOperator(Instruction &I) {
- GBE_ASSERT(I.getType()->isPointerTy() == false ||
+ GBE_ASSERT(I.getType()->isPointerTy() == false &&
I.getType() != Type::getInt1Ty(I.getContext()));
// Get the element type for a vector
}
}
- void GenWriter::regAllocateCastInst(CastInst &I)
- {
+ void GenWriter::regAllocateCastInst(CastInst &I) {
Value *dstValue = &I;
Value *srcValue = I.getOperand(0);
};
}
+ /*! Once again, it is a templated functor. No lambda */
+ struct InsertExtractFunctor {
+ InsertExtractFunctor(ir::Context &ctx) : ctx(ctx) {}
+ template <typename T> ir::Immediate operator() (const T &t) {
+ return ir::Immediate(t);
+ }
+ ir::Context &ctx;
+ };
+
+ void GenWriter::regAllocateInsertElement(InsertElementInst &I) {
+ Value *modified = I.getOperand(0);
+ Value *toInsert = I.getOperand(1);
+ Value *index = I.getOperand(2);
+ GBE_ASSERTM(!isa<Constant>(modified) || isa<UndefValue>(modified),
+ "TODO SUPPORT constant vector for insert");
+ Constant *CPV = dyn_cast<Constant>(index);
+ GBE_ASSERTM(CPV != NULL, "only constant indices when inserting values");
+ auto x = processConstant<ir::Immediate>(CPV, InsertExtractFunctor(ctx));
+ GBE_ASSERTM(x.type == ir::TYPE_U32 || x.type == ir::TYPE_S32,
+ "Invalid index type for InsertElement");
+
+ // Crash on overrun
+ VectorType *vectorType = cast<VectorType>(modified->getType());
+ const uint32_t elemNum = vectorType->getNumElements();
+ const uint32_t modifiedID = x.data.u32;
+ GBE_ASSERTM(modifiedID < elemNum, "Out-of-bound index for InsertElement");
+
+ // Non modified values are just proxies
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+ if (elemID != modifiedID)
+ regTranslator.newValueProxy(modified, &I, elemID, elemID);
+
+ // If the element to insert is an immediate we will generate a LOADI.
+ // Otherwise, the value is just a proxy of the inserted value
+ if (dyn_cast<Constant>(toInsert) != NULL) {
+ const ir::Type type = getType(ctx, toInsert->getType());
+ const ir::Register reg = ctx.reg(getFamily(type));
+ regTranslator.insertRegister(reg, &I, modifiedID);
+ } else
+ regTranslator.newValueProxy(toInsert, &I, 0, modifiedID);
+ }
+
+ void GenWriter::emitInsertElement(InsertElementInst &I) {
+ // Note that we check everything in regAllocateInsertElement
+ Value *toInsert = I.getOperand(1);
+ Value *index = I.getOperand(2);
+
+ // If this is not a constant, we just use a proxy
+ if (dyn_cast<Constant>(toInsert) == NULL)
+ return;
+
+ // We need a LOADI if we insert a immediate
+ Constant *indexCPV = dyn_cast<Constant>(index);
+ Constant *toInsertCPV = dyn_cast<Constant>(toInsert);
+ auto x = processConstant<ir::Immediate>(indexCPV, InsertExtractFunctor(ctx));
+ const uint32_t modifiedID = x.data.u32;
+ const ir::ImmediateIndex immIndex = this->newImmediate(toInsertCPV);
+ const ir::Immediate imm = ctx.getImmediate(immIndex);
+ const ir::Register reg = regTranslator.getScalar(&I, modifiedID);
+ ctx.LOADI(imm.type, reg, immIndex);
+ }
+
+ void GenWriter::regAllocateExtractElement(ExtractElementInst &I) {
+ Value *extracted = I.getOperand(0);
+ Value *index = I.getOperand(1);
+ GBE_ASSERTM(isa<Constant>(extracted) == false,
+ "TODO SUPPORT constant vector for extract");
+ Constant *CPV = dyn_cast<Constant>(index);
+ GBE_ASSERTM(CPV != NULL, "only constant indices when inserting values");
+ auto x = processConstant<ir::Immediate>(CPV, InsertExtractFunctor(ctx));
+ GBE_ASSERTM(x.type == ir::TYPE_U32 || x.type == ir::TYPE_S32,
+ "Invalid index type for InsertElement");
+
+ // Crash on overrun
+ VectorType *vectorType = cast<VectorType>(extracted->getType());
+ const uint32_t elemNum = vectorType->getNumElements();
+ const uint32_t extractedID = x.data.u32;
+ GBE_ASSERTM(extractedID < elemNum, "Out-of-bound index for InsertElement");
+
+ // Easy when the vector is not immediate
+ regTranslator.newValueProxy(extracted, &I, extractedID, 0);
+ }
+
+ void GenWriter::emitExtractElement(ExtractElementInst &I) {
+ // TODO -> insert LOADI when the extracted vector is constant
+ }
+
+ void GenWriter::regAllocateShuffleVectorInst(ShuffleVectorInst &I) {
+ Value *first = I.getOperand(0);
+ Value *second = I.getOperand(1);
+ GBE_ASSERTM(!isa<Constant>(first) || isa<UndefValue>(first),
+ "TODO support constant vector for shuffle");
+ GBE_ASSERTM(!isa<Constant>(second) || isa<UndefValue>(second),
+ "TODO support constant vector for shuffle");
+ VectorType *dstType = cast<VectorType>(I.getType());
+ VectorType *srcType = cast<VectorType>(first->getType());
+ const uint32_t dstElemNum = dstType->getNumElements();
+ const uint32_t srcElemNum = srcType->getNumElements();
+ for (uint32_t elemID = 0; elemID < dstElemNum; ++elemID) {
+ uint32_t srcID = I.getMaskValue(elemID);
+ Value *src = first;
+ if (srcID >= srcElemNum) {
+ srcID -= srcElemNum;
+ src = second;
+ }
+ regTranslator.newValueProxy(src, &I, srcID, elemID);
+ }
+ }
+
+ void GenWriter::emitShuffleVectorInst(ShuffleVectorInst &I) {
+
+ }
+
+ void GenWriter::regAllocateSelectInst(SelectInst &I) {
+ this->newRegister(&I);
+ }
+
+ void GenWriter::emitSelectInst(SelectInst &I) {
+ // Get the element type for a vector
+ uint32_t elemNum;
+ const ir::Type type = getVectorInfo(ctx, I.getType(), &I, elemNum);
+
+ // Condition can be either a vector or a scalar
+ Type *condType = I.getOperand(0)->getType();
+ const bool isVectorCond = isa<VectorType>(condType);
+
+ // Emit the instructions in a row
+ for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+ const ir::Register dst = this->getRegister(&I, elemID);
+ const uint32_t condID = isVectorCond ? elemID : 0;
+ const ir::Register cond = this->getRegister(I.getOperand(0), condID);
+ const ir::Register src0 = this->getRegister(I.getOperand(1), elemID);
+ const ir::Register src1 = this->getRegister(I.getOperand(2), elemID);
+ ctx.SEL(type, dst, cond, src0, src1);
+ }
+ }
+
#ifndef NDEBUG
static bool isSupportedIntegerSize(IntegerType &T) {
return T.getBitWidth() == 8 || T.getBitWidth() == 16 ||
GBE_ASSERT(dummyKernel != NULL);
fclose(dummyKernel);
+ UTEST_EXPECT_SUCCESS(utestLLVM2Gen("select.ll"));
+#if 1
+ UTEST_EXPECT_SUCCESS(utestLLVM2Gen("shuffle.ll"));
+ UTEST_EXPECT_SUCCESS(utestLLVM2Gen("extract.ll"));
+ UTEST_EXPECT_SUCCESS(utestLLVM2Gen("insert.ll"));
UTEST_EXPECT_SUCCESS(utestLLVM2Gen("add.ll"));
UTEST_EXPECT_SUCCESS(utestLLVM2Gen("load_store.ll"));
UTEST_EXPECT_SUCCESS(utestLLVM2Gen("add2.ll"));
//UTEST_EXPECT_SUCCESS(utestLLVM2Gen("loop.ll"));
UTEST_EXPECT_SUCCESS(utestLLVM2Gen("void.ll"));
UTEST_EXPECT_SUCCESS(utestLLVM2Gen("cmp_cvt.ll"));
+#endif
}
UTEST_REGISTER(utestLLVM)