-clang -emit-llvm -O3 -ccc-host-triple ptx32 -c $1.cl -o $1.o
+clang -x cl -emit-llvm -O3 -ccc-host-triple ptx32 -c $1.cl -o $1.o
llvm-dis $1.o
mv $1.o.ll $1.ll
--- /dev/null
+__kernel void add(__global int *dst, unsigned int x)
+{
+ for (int i = 0; i < x; ++i) dst[i]++;
+}
+
--- /dev/null
+; ModuleID = 'loop.o'
+target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
+target triple = "ptx32--"
+
+define ptx_kernel void @add(i32* nocapture %dst, i32 %x) nounwind noinline {
+entry:
+ %cmp2 = icmp eq i32 %x, 0
+ br i1 %cmp2, label %for.end, label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.03 = phi i32 [ %inc1, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32* %dst, i32 %i.03
+ %0 = load i32* %arrayidx, align 4, !tbaa !1
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* %arrayidx, align 4, !tbaa !1
+ %inc1 = add nsw i32 %i.03, 1
+ %exitcond = icmp eq i32 %inc1, %x
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+!opencl.kernels = !{!0}
+
+!0 = metadata !{void (i32*, i32)* @add}
+!1 = metadata !{metadata !"int", metadata !2}
+!2 = metadata !{metadata !"omnipotent char", metadata !3}
+!3 = metadata !{metadata !"Simple C/C++ TBAA", null}
--- /dev/null
+typedef float float4 __attribute__((ext_vector_type(4)));
+__attribute__((pure, overloadable)) int mad(int,int,int);
+__attribute__((pure, overloadable)) float mad(float,float,float);
+__attribute__((pure, overloadable)) float4 mad(float4,float4,float4);
+
+__kernel void add(__global int *dst, unsigned int x, float z)
+{
+ for (int i = 0; i < x; ++i) {
+ int y = mad(dst[i], 2, 3);
+ y = mad(dst[i], 2, 3);
+ float z = mad((float) dst[i], 2.f, 3.f);
+ float4 z0 = mad((float4) dst[i], (float4)(0.f,1.f,2.f,3.f), (float4)3.f);
+ dst[i] = y + (int) z + z0.x + z0.y + z0.z;
+ }
+}
+
--- /dev/null
+; ModuleID = 'mad.o'
+target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
+target triple = "ptx32--"
+
+define ptx_kernel void @add(i32* nocapture %dst, i32 %x, float %z) nounwind noinline {
+entry:
+ %cmp16 = icmp eq i32 %x, 0
+ br i1 %cmp16, label %for.end, label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.017 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32* %dst, i32 %i.017
+ %0 = load i32* %arrayidx, align 4, !tbaa !1
+ %call2 = tail call ptx_device i32 @_Z3madiii(i32 %0, i32 2, i32 3) nounwind readonly
+ %conv = sitofp i32 %0 to float
+ %call5 = tail call ptx_device float @_Z3madfff(float %conv, float 2.000000e+00, float 3.000000e+00) nounwind readonly
+ %1 = insertelement <4 x float> undef, float %conv, i32 0
+ %splat = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
+ %call8 = tail call ptx_device <4 x float> @_Z3madDv4_fS_S_(<4 x float> %splat, <4 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x float> <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>) nounwind readonly
+ %conv9 = fptosi float %call5 to i32
+ %add = add nsw i32 %conv9, %call2
+ %conv10 = sitofp i32 %add to float
+ %2 = extractelement <4 x float> %call8, i32 0
+ %add11 = fadd float %conv10, %2
+ %3 = extractelement <4 x float> %call8, i32 1
+ %add12 = fadd float %add11, %3
+ %4 = extractelement <4 x float> %call8, i32 2
+ %add13 = fadd float %add12, %4
+ %conv14 = fptosi float %add13 to i32
+ store i32 %conv14, i32* %arrayidx, align 4, !tbaa !1
+ %inc = add nsw i32 %i.017, 1
+ %exitcond = icmp eq i32 %inc, %x
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare ptx_device i32 @_Z3madiii(i32, i32, i32) nounwind readonly
+
+declare ptx_device float @_Z3madfff(float, float, float) nounwind readonly
+
+declare ptx_device <4 x float> @_Z3madDv4_fS_S_(<4 x float>, <4 x float>, <4 x float>) nounwind readonly
+
+!opencl.kernels = !{!0}
+
+!0 = metadata !{void (i32*, i32, float)* @add}
+!1 = metadata !{metadata !"int", metadata !2}
+!2 = metadata !{metadata !"omnipotent char", metadata !3}
+!3 = metadata !{metadata !"Simple C/C++ TBAA", null}
const std::string &getName(void) const { return name; }
/*! Extract the register from the register file */
INLINE RegisterData getRegisterData(Register ID) const { return file.get(ID); }
+ /*! Get the register family from the register itself */
+ INLINE RegisterData::Family getRegisterFamiy(Register ID) const {
+ return this->getRegisterData(ID).family;
+ }
/*! Get the register index from the tuple vector */
INLINE Register getRegister(Tuple ID, uint32_t which) const {
return file.get(ID, which);
public BasePolicy, public NoDstPolicy
{
public:
- INLINE BranchInstruction(LabelIndex labelIndex, Register predicate) {
- this->opcode = OP_BRA;
+ INLINE BranchInstruction(Opcode op, LabelIndex labelIndex, Register predicate) {
+ GBE_ASSERT(op == OP_BRA);
+ this->opcode = op;
this->predicate = predicate;
this->labelIndex = labelIndex;
this->hasPredicate = true;
+ this->hasLabel = true;
}
- INLINE BranchInstruction(LabelIndex labelIndex) {
+ INLINE BranchInstruction(Opcode op, LabelIndex labelIndex) {
+ GBE_ASSERT(op == OP_BRA);
this->opcode = OP_BRA;
this->labelIndex = labelIndex;
this->hasPredicate = false;
+ this->hasLabel = true;
+ }
+ INLINE BranchInstruction(Opcode op) {
+ GBE_ASSERT(op == OP_RET);
+ this->opcode = OP_RET;
+ this->hasPredicate = false;
+ this->hasLabel = false;
+ }
+ INLINE LabelIndex getLabelIndex(void) const {
+ GBE_ASSERTM(hasLabel, "No target label for this branch instruction");
+ return labelIndex;
}
- INLINE LabelIndex getLabelIndex(void) const { return labelIndex; }
INLINE uint32_t getSrcNum(void) const { return hasPredicate ? 1 : 0; }
INLINE Register getSrcIndex(const Function &fn, uint32_t ID) const {
GBE_ASSERTM(hasPredicate, "No source for unpredicated branches");
INLINE void out(std::ostream &out, const Function &fn) const;
Register predicate; //!< Predication means conditional branch
LabelIndex labelIndex; //!< Index of the label the branch targets
- bool hasPredicate; //!< Is it predicated?
+ bool hasPredicate:1; //!< Is it predicated?
+ bool hasLabel:1; //!< Is there any target label?
};
class ALIGNED_INSTRUCTION LoadInstruction :
// The label must exist and the register must of boolean family
INLINE bool BranchInstruction::wellFormed(const Function &fn, std::string &whyNot) const
{
- if (UNLIKELY(labelIndex >= fn.labelNum())) {
- whyNot = "Out-of-bound label index";
- return false;
- }
+ if (hasLabel)
+ if (UNLIKELY(labelIndex >= fn.labelNum())) {
+ whyNot = "Out-of-bound label index";
+ return false;
+ }
if (hasPredicate)
if (UNLIKELY(checkRegisterData(RegisterData::BOOL, predicate, fn, whyNot) == false))
return false;
this->outOpcode(out);
if (hasPredicate)
out << "<%" << this->getSrcIndex(fn, 0) << ">";
- out << " -> label$" << labelIndex;
+ if (hasLabel) out << " -> label$" << labelIndex;
}
INLINE void LoadImmInstruction::out(std::ostream &out, const Function &fn) const {
// BRA
Instruction BRA(LabelIndex labelIndex) {
- const internal::BranchInstruction insn(labelIndex);
+ const internal::BranchInstruction insn(OP_BRA, labelIndex);
return insn.convert();
}
Instruction BRA(LabelIndex labelIndex, Register pred) {
- const internal::BranchInstruction insn(labelIndex, pred);
+ const internal::BranchInstruction insn(OP_BRA, labelIndex, pred);
+ return insn.convert();
+ }
+
+ // RET
+ Instruction RET(void) {
+ const internal::BranchInstruction insn(OP_RET);
return insn.convert();
}
Instruction BRA(LabelIndex labelIndex);
/*! (pred) bra labelIndex */
Instruction BRA(LabelIndex labelIndex, Register pred);
+ /*! ret */
+ Instruction RET(void);
/*! loadi.type dst value */
Instruction LOADI(Type type, Register dst, ImmediateIndex value);
/*! load.type.space {dst1,...,dst_valueNum} offset value */
DECL_INSN(GT, CompareInstruction)
DECL_INSN(CVT, ConvertInstruction)
DECL_INSN(BRA, BranchInstruction)
+DECL_INSN(RET, BranchInstruction)
DECL_INSN(TEX, TextureInstruction)
DECL_INSN(LOADI, LoadImmInstruction)
DECL_INSN(LOAD, LoadInstruction)
return RegisterData::DWORD;
}
+ /*! Return a type for each register family */
+ INLINE Type getType(RegisterData::Family family) {
+ switch (family) {
+ case RegisterData::BOOL: return TYPE_BOOL;
+ case RegisterData::BYTE: return TYPE_U8;
+ case RegisterData::WORD: return TYPE_U16;
+ case RegisterData::DWORD: return TYPE_U32;
+ case RegisterData::QWORD: return TYPE_U64;
+ };
+ return TYPE_U32;
+ }
+
} /* namespace ir */
} /* namespace gbe */
#include "ir/context.hpp"
#include "ir/unit.hpp"
-#include "sys/hash_map.hpp"
+#include "sys/map.hpp"
#include <algorithm>
using namespace llvm;
MCContext *TCtx;
const TargetData* TD;
- /*! Map value name to ir::Register*/
- hash_map<std::string, ir::Register> registerMap;
+ /*! Map value to ir::Register*/
+ map<const Value*, ir::Register> registerMap;
+
+ /*! Map value to ir::LabelIndex */
+ map<const Value*, ir::LabelIndex> labelMap;
std::map<const ConstantFP *, unsigned> FPConstantMap;
std::set<Function*> intrinsicPrototypesAlreadyGenerated;
void printFloatingPointConstants(const Constant *C);
void emitFunctionSignature(const Function *F, bool Prototype);
+ /*! Emit the complete function code and declaration */
+ void emitFunction(Function &F);
/*! Handle input and output function parameters */
void emitFunctionPrototype(const Function *F);
+ /*! Emit the code for a basic block */
+ void emitBasicBlock(BasicBlock *BB);
/*! Get the register family from the given type */
INLINE ir::RegisterData::Family getArgumentFamily(const Type*) const;
+ /*! Insert a new register when this is a scalar value */
+ INLINE void newRegister(const Value *value);
+ /*! Return a valid register from an operand (can use LOADI to make one) */
+ INLINE ir::Register getRegister(Value *value);
+ /*! Insert a new label index when this is a scalar value */
+ INLINE void newLabelIndex(const Value *value);
+ /*! int / float / double / bool are scalars */
+ INLINE bool isScalarType(const Type *type) const;
+ /*! Get the Gen IR type from the LLVM type */
+ INLINE ir::Type getType(const Type *type) const;
- void emitFunction(Function &);
void printBasicBlock(BasicBlock *BB);
void printLoop(Loop *L);
};
char GenWriter::ID = 0;
+#define PRINT_CODE 1
static std::string CBEMangle(const std::string &S) {
std::string Result;
}
}
- INLINE ir::RegisterData::Family GenWriter::getArgumentFamily(const Type *type) const
+ INLINE bool GenWriter::isScalarType(const Type *type) const
{
- GBE_ASSERT(type->isFloatTy() ||
- type->isIntegerTy() ||
- type->isDoubleTy() ||
- type->isPointerTy());
+ return type->isFloatTy() ||
+ type->isIntegerTy() ||
+ type->isDoubleTy() ||
+ type->isPointerTy();
+ }
+ INLINE ir::Type GenWriter::getType(const Type *type) const
+ {
+ GBE_ASSERT(this->isScalarType(type));
+ if (type->isFloatTy() == true)
+ return ir::TYPE_FLOAT;
+ if (type->isDoubleTy() == true)
+ return ir::TYPE_DOUBLE;
+ if (type->isPointerTy() == true) {
+ if (ctx.getPointerSize() == ir::POINTER_32_BITS)
+ return ir::TYPE_U32;
+ else
+ return ir::TYPE_U64;
+ }
+ GBE_ASSERT(type->isIntegerTy() == true);
+ if (type == Type::getInt1Ty(type->getContext()))
+ return ir::TYPE_BOOL;
+ if (type == Type::getInt8Ty(type->getContext()))
+ return ir::TYPE_S8;
+ if (type == Type::getInt16Ty(type->getContext()))
+ return ir::TYPE_S16;
+ if (type == Type::getInt32Ty(type->getContext()))
+ return ir::TYPE_S32;
+ if (type == Type::getInt64Ty(type->getContext()))
+ return ir::TYPE_S64;
+ GBE_ASSERT(0);
+ return ir::TYPE_S64;
+ }
+
+ INLINE ir::RegisterData::Family GenWriter::getArgumentFamily(const Type *type) const
+ {
+ GBE_ASSERT(this->isScalarType(type) == true);
if (type == Type::getInt1Ty(type->getContext()))
return ir::RegisterData::BOOL;
if (type == Type::getInt8Ty(type->getContext()))
return ir::RegisterData::BOOL;
}
+ void GenWriter::newRegister(const Value *value) {
+ if (registerMap.find(value) == registerMap.end()) {
+ const Type *type = value->getType();
+ const ir::RegisterData::Family family = getArgumentFamily(type);
+ const ir::Register reg = ctx.reg(family);
+ ctx.input(reg);
+ registerMap[value] = reg;
+ }
+ }
+
+ ir::Register GenWriter::getRegister(Value *value) {
+ Constant *CPV = dyn_cast<Constant>(value);
+ if (CPV && !isa<GlobalValue>(CPV)) {
+ GBE_ASSERT(0);
+ // printConstant(CPV, Static);
+ } else {
+ GBE_ASSERT(this->registerMap.find(value) != this->registerMap.end());
+ return this->registerMap[value];
+ }
+ }
+
+ void GenWriter::newLabelIndex(const Value *value) {
+ if (labelMap.find(value) == labelMap.end()) {
+ const ir::LabelIndex label = ctx.label();
+ labelMap[value] = label;
+ }
+ }
+
+ void GenWriter::emitBasicBlock(BasicBlock *BB) {
+ GBE_ASSERT(labelMap.find(BB) != labelMap.end());
+ ctx.LABEL(labelMap[BB]);
+ for (auto II = BB->begin(), E = BB->end(); II != E; ++II) {
+ const Type *Ty = II->getType();
+ GBE_ASSERT(!Ty->isIntegerTy() ||
+ (Ty==Type::getInt1Ty(II->getContext()) ||
+ Ty==Type::getInt8Ty(II->getContext()) ||
+ Ty==Type::getInt16Ty(II->getContext()) ||
+ Ty==Type::getInt32Ty(II->getContext()) ||
+ Ty==Type::getInt64Ty(II->getContext())));
+ visit(*II);
+ }
+ }
+
void GenWriter::emitFunctionPrototype(const Function *F)
{
const bool returnStruct = F->hasStructRetAttr();
fn.setStructReturned(true);
}
- std::string ArgName;
- for (; I != E; ++I) {
- ArgName = GetValueName(I);
-
- // Insert a new register if we need to
- if (registerMap.find(ArgName) == registerMap.end()) {
- const Type *type = I->getType();
- const ir::RegisterData::Family family = getArgumentFamily(type);
- const ir::Register reg = ctx.reg(family);
- ctx.input(reg);
- registerMap[ArgName] = reg;
- }
- }
+ // Insert a new register if we need to
+ for (; I != E; ++I) this->newRegister(I);
}
// When returning a structure, first input register is the pointer to the
// structure
if (!returnStruct) {
const Type *type = F->getReturnType();
- const ir::RegisterData::Family family = getArgumentFamily(type);
- const ir::Register reg = ctx.reg(family);
- ctx.output(reg);
+ if (type->isVoidTy() == false) {
+ const ir::RegisterData::Family family = getArgumentFamily(type);
+ const ir::Register reg = ctx.reg(family);
+ ctx.output(reg);
+ }
}
#if GBE_DEBUG
{
ctx.startFunction(GetValueName(&F));
this->registerMap.clear();
+ this->labelMap.clear();
this->emitFunctionPrototype(&F);
+ // We create all the register variables
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I)
+ if (I->getType() != Type::getVoidTy(F.getContext()))
+ this->newRegister(&*I);
+
+ // First create all the labels (one per block)
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ this->newLabelIndex(BB);
+
+ // ... then, emit the code for all basic blocks
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ emitBasicBlock(BB);
+#if 0
/// isStructReturn - Should this function actually return a struct by-value?
bool isStructReturn = F.hasStructRetAttr();
// print local variable information for the function
for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
if (const AllocaInst *AI = isDirectAlloca(&*I)) {
+ GBE_ASSERT(0);
Out << " ";
printType(Out, AI->getAllocatedType(), false, GetValueName(AI));
Out << "; /* Address-exposed local */\n";
}
Out << "}\n\n";
-
+#endif
ctx.endFunction();
}
//
void GenWriter::visitReturnInst(ReturnInst &I) {
// If this is a struct return function, return the temporary struct.
- bool isStructReturn = I.getParent()->getParent()->hasStructRetAttr();
+ const ir::Function &fn = ctx.getFunction();
+ GBE_ASSERTM(fn.outputNum() <= 1, "no more than one value can be returned");
+ if (fn.outputNum() == 1 && I.getNumOperands() > 0) {
+ const ir::Register dst = fn.getOutput(0);
+ const ir::Register src = this->getRegister(I.getOperand(0));
+ const ir::RegisterData::Family family = fn.getRegisterFamiy(dst);;
+ ctx.MOV(ir::getType(family), dst, src);
+ }
+ ctx.RET();
+ bool isStructReturn = I.getParent()->getParent()->hasStructRetAttr();
if (isStructReturn) {
Out << " return StructReturn;\n";
return;
!I.getParent()->size() == 1) {
return;
}
-
+#if 0
Out << " return";
if (I.getNumOperands()) {
Out << ' ';
writeOperand(I.getOperand(0));
}
Out << ";\n";
+#endif
}
void GenWriter::visitSwitchInst(SwitchInst &SI) {
}
- void GenWriter::visitBinaryOperator(Instruction &I) {
+ void GenWriter::visitBinaryOperator(Instruction &I)
+ {
+ GBE_ASSERT(!I.getType()->isPointerTy());
+ GBE_ASSERT(this->registerMap.find(&I) != this->registerMap.end());
+ const ir::Register dst = this->registerMap[&I];
+ const ir::Register src0 = this->getRegister(I.getOperand(0));
+ const ir::Register src1 = this->getRegister(I.getOperand(1));
+ const ir::Type type = this->getType(I.getType());
+
+ switch (I.getOpcode()) {
+ case Instruction::Add:
+ case Instruction::FAdd: ctx.ADD(type, dst, src0, src1); break;
+ case Instruction::Sub:
+ case Instruction::FSub: ctx.SUB(type, dst, src0, src1); break;
+ case Instruction::Mul:
+ case Instruction::FMul: ctx.MUL(type, dst, src0, src1); break;
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem: ctx.REM(type, dst, src0, src1); break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv: ctx.DIV(type, dst, src0, src1); break;
+ case Instruction::And: ctx.AND(type, dst, src0, src1); break;
+ case Instruction::Or: ctx.OR(type, dst, src0, src1); break;
+ case Instruction::Xor: ctx.XOR(type, dst, src0, src1); break;
+ case Instruction::Shl : ctx.SHL(type, dst, src0, src1); break;
+ case Instruction::LShr: ctx.SHR(type, dst, src0, src1); break;
+ case Instruction::AShr: ctx.ASR(type, dst, src0, src1); break;
+ default:
+ GBE_ASSERT(0);
+ };
+
+#if 0
// binary instructions, shift instructions, setCond instructions.
assert(!I.getType()->isPointerTy());
-
// We must cast the results of binary operations which might be promoted.
bool needsCast = false;
if ((I.getType() == Type::getInt8Ty(I.getContext())) ||
if (needsCast) {
Out << "))";
}
+#endif
}
void GenWriter::visitICmpInst(ICmpInst &I) {
void GenWriter::visitInsertValueInst(InsertValueInst &IVI) {
// Start by copying the entire aggregate value into the result variable.
- writeOperand(IVI.getOperand(0));
+ writeOperand(IVI.getOperand(0));
Out << ";\n ";
// Then do the insert to update the field.
GBE_ASSERT(dummyKernel != NULL);
fclose(dummyKernel);
- //UTEST_EXPECT_SUCCESS(utestLLVM2Gen("add.ll"));
- UTEST_EXPECT_SUCCESS(utestLLVM2Gen("add2.ll"));
+ UTEST_EXPECT_SUCCESS(utestLLVM2Gen("add.ll"));
+ //UTEST_EXPECT_SUCCESS(utestLLVM2Gen("add2.ll"));
+ //UTEST_EXPECT_SUCCESS(utestLLVM2Gen("loop.ll"));
//UTEST_EXPECT_SUCCESS(utestLLVM2Gen("void.ll"));
}