double fmod(double, double);
float fmodf(float, float);
long double fmodl(long double, long double);
-void test_global_id(unsigned int *llvm_cbe_dst);
-unsigned int __gen_get_global_id0(void);
+void test_global_id(unsigned int *llvm_cbe_dst, unsigned int *llvm_cbe_p);
+unsigned int __gen_ocl_get_global_id0(void);
+unsigned int __gen_ocl_get_local_id0(void);
void abort(void);
static inline int llvm_fcmp_ole(double X, double Y) { return X <= Y ; }
static inline int llvm_fcmp_oge(double X, double Y) { return X >= Y ; }
-void test_global_id(unsigned int *llvm_cbe_dst) {
+void test_global_id(unsigned int *llvm_cbe_dst, unsigned int *llvm_cbe_p) {
unsigned int llvm_cbe_call_2e_i;
+ unsigned int llvm_cbe_call_2e_i6;
- llvm_cbe_call_2e_i = /*tail*/ __gen_get_global_id0();
- *((&llvm_cbe_dst[((signed int )llvm_cbe_call_2e_i)])) = 1u;
+ llvm_cbe_call_2e_i = /*tail*/ __gen_ocl_get_local_id0();
+ llvm_cbe_call_2e_i6 = /*tail*/ __gen_ocl_get_global_id0();
+ *((&llvm_cbe_dst[((signed int )llvm_cbe_call_2e_i6)])) = (((signed int )(((signed int )(llvm_cbe_call_2e_i << 16u)) >> ((signed int )16u))));
+ *((&llvm_cbe_p[((signed int )llvm_cbe_call_2e_i6)])) = llvm_cbe_call_2e_i;
return;
}
-__attribute__((pure)) unsigned int __gen_get_global_id0(void);
-__attribute__((pure)) unsigned int __gen_get_global_id1(void);
-__attribute__((pure)) unsigned int __gen_get_global_id2(void);
+__attribute__((pure,const)) unsigned int __gen_ocl_get_global_id0(void);
+__attribute__((pure,const)) unsigned int __gen_ocl_get_global_id1(void);
+__attribute__((pure,const)) unsigned int __gen_ocl_get_global_id2(void);
+__attribute__((pure,const)) unsigned int __gen_ocl_get_local_id0(void);
+__attribute__((pure,const)) unsigned int __gen_ocl_get_local_id1(void);
+__attribute__((pure,const)) unsigned int __gen_ocl_get_local_id2(void);
inline unsigned get_global_id(unsigned int dim) {
- if (dim == 0) return __gen_get_global_id0();
- else if (dim == 1) return __gen_get_global_id1();
- else if (dim == 2) return __gen_get_global_id2();
+ if (dim == 0) return __gen_ocl_get_global_id0();
+ else if (dim == 1) return __gen_ocl_get_global_id1();
+ else if (dim == 2) return __gen_ocl_get_global_id2();
else return 0;
}
-__kernel void test_global_id(__global int *dst)
+inline unsigned get_local_id(unsigned int dim) {
+ if (dim == 0) return __gen_ocl_get_local_id0();
+ else if (dim == 1) return __gen_ocl_get_local_id1();
+ else if (dim == 2) return __gen_ocl_get_local_id2();
+ else return 0;
+}
+
+__kernel void test_global_id(__global int *dst, __global int *p)
{
- short hop = get_global_id(0);
+ short hop = get_local_id(0);
dst[get_global_id(0)] = hop;
+ p[get_global_id(0)] = get_local_id(0);
}
target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
target triple = "ptx32--"
-define ptx_kernel void @test_global_id(i32* nocapture %dst) nounwind noinline {
-get_global_id.exit5:
- %call.i = tail call ptx_device i32 @__gen_get_global_id0() nounwind readonly
+define ptx_kernel void @test_global_id(i32* nocapture %dst, i32* nocapture %p) nounwind noinline {
+get_global_id.exit13:
+ %call.i = tail call ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone
%sext = shl i32 %call.i, 16
%conv1 = ashr exact i32 %sext, 16
- %arrayidx = getelementptr inbounds i32* %dst, i32 %call.i
+ %call.i6 = tail call ptx_device i32 @__gen_ocl_get_global_id0() nounwind readnone
+ %arrayidx = getelementptr inbounds i32* %dst, i32 %call.i6
store i32 %conv1, i32* %arrayidx, align 4, !tbaa !1
+ %arrayidx5 = getelementptr inbounds i32* %p, i32 %call.i6
+ store i32 %call.i, i32* %arrayidx5, align 4, !tbaa !1
ret void
}
-declare ptx_device i32 @__gen_get_global_id0() nounwind readonly
+declare ptx_device i32 @__gen_ocl_get_global_id0() nounwind readnone
+
+declare ptx_device i32 @__gen_ocl_get_local_id0() nounwind readnone
!opencl.kernels = !{!0}
-!0 = metadata !{void (i32*)* @test_global_id}
+!0 = metadata !{void (i32*, i32*)* @test_global_id}
!1 = metadata !{metadata !"int", metadata !2}
!2 = metadata !{metadata !"omnipotent char", metadata !3}
!3 = metadata !{metadata !"Simple C/C++ TBAA", null}
__local int array[256];
for (int i = 0; i < 256; ++i)
array[i] = i;
- mem[0] = array[x] + g[x];
+ mem[0] = s.a + array[x] + g[x];
}
br label %for.body
for.body: ; preds = %for.body, %entry
- %i.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds [256 x i32] addrspace(4)* @struct_cl.array, i32 0, i32 %i.04
- store i32 %i.04, i32 addrspace(4)* %arrayidx, align 4, !tbaa !1
- %inc = add nsw i32 %i.04, 1
+ %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds [256 x i32] addrspace(4)* @struct_cl.array, i32 0, i32 %i.05
+ store i32 %i.05, i32 addrspace(4)* %arrayidx, align 4, !tbaa !1
+ %inc = add nsw i32 %i.05, 1
%exitcond = icmp eq i32 %inc, 256
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
+ %a = getelementptr inbounds %struct.my_struct* %s, i32 0, i32 0
+ %0 = load i32* %a, align 4, !tbaa !1
%arrayidx1 = getelementptr inbounds [256 x i32] addrspace(4)* @struct_cl.array, i32 0, i32 %x
- %0 = load i32 addrspace(4)* %arrayidx1, align 4, !tbaa !1
+ %1 = load i32 addrspace(4)* %arrayidx1, align 4, !tbaa !1
%arrayidx2 = getelementptr inbounds [4 x i32] addrspace(1)* @g, i32 0, i32 %x
- %1 = load i32 addrspace(1)* %arrayidx2, align 4, !tbaa !1
- %add = add nsw i32 %1, %0
- store i32 %add, i32* %mem, align 4, !tbaa !1
+ %2 = load i32 addrspace(1)* %arrayidx2, align 4, !tbaa !1
+ %add = add i32 %1, %0
+ %add3 = add i32 %add, %2
+ store i32 %add3, i32* %mem, align 4, !tbaa !1
ret void
}
--- /dev/null
+__kernel void undefined(__global int *dst)
+{
+ int x;
+ if (x == 0)
+ dst[0] = 0;
+ else
+ dst[0] = 1;
+}
+
--- /dev/null
+; ModuleID = 'undefined.o'
+target datalayout = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"
+target triple = "ptx32--"
+
+define ptx_kernel void @undefined(i32* %dst) nounwind noinline {
+entry:
+ %dst.addr = alloca i32*, align 4
+ %x = alloca i32, align 4
+ store i32* %dst, i32** %dst.addr, align 4
+ %0 = load i32* %x, align 4
+ %cmp = icmp eq i32 %0, 0
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %1 = load i32** %dst.addr, align 4
+ %arrayidx = getelementptr inbounds i32* %1, i32 0
+ store i32 0, i32* %arrayidx
+ br label %if.end
+
+if.else: ; preds = %entry
+ %2 = load i32** %dst.addr, align 4
+ %arrayidx1 = getelementptr inbounds i32* %2, i32 0
+ store i32 1, i32* %arrayidx1
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ ret void
+}
+
+!opencl.kernels = !{!0}
+
+!0 = metadata !{void (i32*)* @undefined}
void output(Register reg);
/*! Get the current processed function */
Function &getFunction(void);
+ /*! Get the current processed unit */
+ INLINE Unit &getUnit(void) { return unit; }
/*! Append a new tuple */
template <typename... Args> INLINE Tuple tuple(Args...args) {
GBE_ASSERTM(fn != NULL, "No function currently defined");
{
static void init(Function &fn) {
IF_DEBUG(Register r);
- IF_DEBUG(r = ) fn.newRegister(RegisterData::WORD);
+ IF_DEBUG(r =) fn.newRegister(RegisterData::DWORD);
GBE_ASSERT(r == lid0);
- IF_DEBUG(r = ) fn.newRegister(RegisterData::WORD);
+ IF_DEBUG(r =) fn.newRegister(RegisterData::DWORD);
GBE_ASSERT(r == lid1);
- IF_DEBUG(r = ) fn.newRegister(RegisterData::WORD);
+ IF_DEBUG(r =) fn.newRegister(RegisterData::DWORD);
GBE_ASSERT(r == lid2);
- IF_DEBUG(r = ) fn.newRegister(RegisterData::DWORD);
+ IF_DEBUG(r =) fn.newRegister(RegisterData::DWORD);
GBE_ASSERT(r == gid0);
- IF_DEBUG(r = ) fn.newRegister(RegisterData::DWORD);
+ IF_DEBUG(r =) fn.newRegister(RegisterData::DWORD);
GBE_ASSERT(r == gid1);
- IF_DEBUG(r = ) fn.newRegister(RegisterData::DWORD);
+ IF_DEBUG(r =) fn.newRegister(RegisterData::DWORD);
GBE_ASSERT(r == gid2);
}
} /* namespace ocl */
* use the same pointer size as the unit they belong to
*/
enum PointerSize {
- POINTER_32_BITS = 0,
- POINTER_64_BITS = 1
+ POINTER_32_BITS = 32,
+ POINTER_64_BITS = 64
};
/*! Complete unit of compilation. It contains a set of functions and a set of
include (${LLVM_DIR}/LLVMProcessSources.cmake)
include_directories(${LLVM_INCLUDE_DIRS})
include_directories(../)
-add_llvm_target(GenBackend llvm_to_gen.cpp llvm_gen_backend.cpp)
+add_llvm_target(GenBackend
+ llvm_to_gen.cpp
+ llvm_gen_backend.cpp
+ llvm_passes.cpp)
add_llvm_library_dependencies(LLVMGenBackend
LLVMAnalysis
LLVMCodeGen
#include "llvm/Support/SourceMgr.h"
#include "llvm/Config/config.h"
+#include "llvm/llvm_gen_backend.hpp"
#include "ir/context.hpp"
#include "ir/unit.hpp"
#include "sys/map.hpp"
LI = &getAnalysis<LoopInfo>();
- // Get rid of intrinsics we can't handle.
- lowerIntrinsics(F);
-
// Output all floating point constants that cannot be printed accurately.
printFloatingPointConstants(F);
private :
- void lowerIntrinsics(Function &F);
/// Prints the definition of the intrinsic function F. Supports the
/// intrinsics which need to be explicitly defined in the CBackend.
void printIntrinsicDefinition(const Function &F, raw_ostream &Out);
/*! Get the register family from the given type */
INLINE ir::RegisterData::Family getArgumentFamily(const Type*) const;
/*! Insert a new register when this is a scalar value */
- INLINE void newRegister(const Value *value);
+ INLINE ir::Register newRegister(const Value *value);
/*! Return a valid register from an operand (can use LOADI to make one) */
INLINE ir::Register getRegister(Value *value);
/*! Return a valid register for a constant value */
// Instruction visitation functions
friend class InstVisitor<GenWriter>;
+ // Currently supported instructions
+ void visitBinaryOperator(Instruction &I);
void visitReturnInst(ReturnInst &I);
- void visitBranchInst(BranchInst &I);
-
- void visitVAArgInst(VAArgInst &I) {GBE_ASSERTM(false, "Not supported");}
- void visitSwitchInst(SwitchInst &I) {GBE_ASSERTM(false, "Not supported");}
- void visitInvokeInst(InvokeInst &I) {GBE_ASSERTM(false, "Not supported");}
- void visitUnwindInst(UnwindInst &I) {GBE_ASSERTM(false, "Not supported");}
- void visitResumeInst(ResumeInst &I) {GBE_ASSERTM(false, "Not supported");}
- void visitInlineAsm(CallInst &I) {GBE_ASSERTM(false, "Not supported");}
- void visitIndirectBrInst(IndirectBrInst &I) {GBE_ASSERTM(false, "Not supported");}
- void visitUnreachableInst(UnreachableInst &I) {GBE_ASSERTM(false, "Not supported");}
+ void visitLoadInst(LoadInst &I);
+ void visitStoreInst(StoreInst &I);
+ void visitCallInst (CallInst &I);
+ bool visitBuiltinCall(CallInst &I, Intrinsic::ID ID, bool &WroteCallee);
+ // Must be implemented later
+ void visitInsertElementInst(InsertElementInst &I) {NOT_SUPPORTED;}
+ void visitExtractElementInst(ExtractElementInst &I) {NOT_SUPPORTED;}
+ void visitShuffleVectorInst(ShuffleVectorInst &SVI) {NOT_SUPPORTED;}
+ void visitInsertValueInst(InsertValueInst &I) {NOT_SUPPORTED;}
+ void visitExtractValueInst(ExtractValueInst &I) {NOT_SUPPORTED;}
+ void visitPHINode(PHINode &I) {NOT_SUPPORTED;}
+ void visitBranchInst(BranchInst &I) {NOT_SUPPORTED;}
+ void visitICmpInst(ICmpInst &I) {NOT_SUPPORTED;}
+ void visitFCmpInst(FCmpInst &I) {NOT_SUPPORTED;}
+ void visitCastInst (CastInst &I);
+ void visitSelectInst(SelectInst &I) {NOT_SUPPORTED;}
- void visitPHINode(PHINode &I);
- void visitBinaryOperator(Instruction &I);
- void visitICmpInst(ICmpInst &I);
- void visitFCmpInst(FCmpInst &I);
+ // These instructions are not supported at all
+ void visitVAArgInst(VAArgInst &I) {NOT_SUPPORTED;}
+ void visitSwitchInst(SwitchInst &I) {NOT_SUPPORTED;}
+ void visitInvokeInst(InvokeInst &I) {NOT_SUPPORTED;}
+ void visitUnwindInst(UnwindInst &I) {NOT_SUPPORTED;}
+ void visitResumeInst(ResumeInst &I) {NOT_SUPPORTED;}
+ void visitInlineAsm(CallInst &I) {NOT_SUPPORTED;}
+ void visitIndirectBrInst(IndirectBrInst &I) {NOT_SUPPORTED;}
+ void visitUnreachableInst(UnreachableInst &I) {NOT_SUPPORTED;}
+ void visitGetElementPtrInst(GetElementPtrInst &I) {NOT_SUPPORTED;}
- void visitCastInst (CastInst &I);
- void visitSelectInst(SelectInst &I);
- void visitCallInst (CallInst &I);
- bool visitBuiltinCall(CallInst &I, Intrinsic::ID ID, bool &WroteCallee);
void visitAllocaInst(AllocaInst &I);
template <bool isLoad, typename T> void visitLoadOrStore(T &I);
- void visitLoadInst (LoadInst &I);
- void visitStoreInst (StoreInst &I);
- void visitGetElementPtrInst(GetElementPtrInst &I);
- void visitInsertElementInst(InsertElementInst &I);
- void visitExtractElementInst(ExtractElementInst &I);
- void visitShuffleVectorInst(ShuffleVectorInst &SVI);
-
- void visitInsertValueInst(InsertValueInst &I);
- void visitExtractValueInst(ExtractValueInst &I);
void visitInstruction(Instruction &I) {
#ifndef NDEBUG
BasicBlock *Successor, unsigned Indent);
void printBranchToBlock(BasicBlock *CurBlock, BasicBlock *SuccBlock,
unsigned Indent);
- void printGEPExpression(Value *Ptr, gep_type_iterator I,
- gep_type_iterator E, bool Static);
std::string GetValueName(const Value *Operand);
};
case Instruction::GetElementPtr:
Out << "(";
- printGEPExpression(CE->getOperand(0), gep_type_begin(CPV),
- gep_type_end(CPV), Static);
+ //printGEPExpression(CE->getOperand(0), gep_type_begin(CPV),
+ // gep_type_end(CPV), Static);
Out << ")";
return;
case Instruction::Select:
return ir::RegisterData::BOOL;
}
- void GenWriter::newRegister(const Value *value) {
- if (registerMap.find(value) == registerMap.end()) {
+ ir::Register GenWriter::newRegister(const Value *value) {
+ auto it = registerMap.find(value);
+ if (it == registerMap.end()) {
const Type *type = value->getType();
const ir::RegisterData::Family family = getArgumentFamily(type);
const ir::Register reg = ctx.reg(family);
- ctx.input(reg);
registerMap[value] = reg;
- }
+ return reg;
+ } else
+ return it->second;
}
ir::Register GenWriter::getConstantRegister(Constant *CPV) {
}
// Insert a new register if we need to
- for (; I != E; ++I) this->newRegister(I);
+ for (; I != E; ++I) {
+ const ir::Register reg = this->newRegister(I);
+ ctx.input(reg);
+ }
}
// When returning a structure, first input register is the pointer to the
#endif
}
+
bool GenWriter::isGotoCodeNecessary(BasicBlock *From, BasicBlock *To) {
/// FIXME: This should be reenabled, but loop reordering safe!!
return true;
return true;
return false;
}
-
+#if 0
void GenWriter::printPHICopiesForSuccessor (BasicBlock *CurBlock,
BasicBlock *Successor,
unsigned Indent) {
// Branch instruction printing - Avoid printing out a branch to a basic block
// that immediately succeeds the current one.
//
- void GenWriter::visitBranchInst(BranchInst &I) {
+ void GenWriter::visitBranchInst(BranchInst &I)
+ {
if (I.isConditional()) {
if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(0))) {
// PHI nodes get copied into temporary values at the end of predecessor basic
// blocks. We now need to copy these temporary values into the REAL value for
// the PHI.
+
void GenWriter::visitPHINode(PHINode &I) {
+ NOT_SUPPORTED;
writeOperand(&I);
Out << "__PHI_TEMPORARY";
}
-
+#endif
void GenWriter::visitBinaryOperator(Instruction &I)
{
#endif
}
+#if 0
void GenWriter::visitICmpInst(ICmpInst &I) {
// We must cast the results of icmp which might be promoted.
bool needsCast = false;
writeOperand(I.getOperand(1));
Out << ")";
}
-
+#endif
+#if 0
static const char * getFloatBitCastField(Type *Ty) {
switch (Ty->getTypeID()) {
default: llvm_unreachable("Invalid Type");
}
}
}
+#endif
+#if 1
void GenWriter::visitCastInst(CastInst &I) {
+ if (I.getOpcode() == Instruction::PtrToInt ||
+ I.getOpcode() == Instruction::IntToPtr) {
+ Value *srcValue = &I, *dstValue = I.getOperand(0);
+ Type *dstType = dstValue->getType();
+ Type *srcType = srcValue->getType();
+ const ir::Unit &unit = ctx.getUnit();
+ GBE_ASSERT(getTypeByteSize(unit, dstType) == getTypeByteSize(unit, srcType));
+ GBE_ASSERT(registerMap.find(dstValue) != registerMap.end());
+ registerMap[dstValue] = registerMap[srcValue];
+ } else
+ NOT_SUPPORTED;
+#if 0
Type *DstTy = I.getType();
Type *SrcTy = I.getOperand(0)->getType();
if (isFPIntBitCast(I)) {
Out << "&1u";
}
Out << ')';
+#endif
}
+#endif
+#if 0
void GenWriter::visitSelectInst(SelectInst &I) {
Out << "((";
writeOperand(I.getCondition());
writeOperand(I.getFalseValue());
Out << "))";
}
+#endif
#ifndef NDEBUG
static bool isSupportedIntegerSize(IntegerType &T) {
#endif
}
- void GenWriter::lowerIntrinsics(Function &F) {
- // This is used to keep track of intrinsics that get generated to a lowered
- // function. We must generate the prototypes before the function body which
- // will only be expanded on first use (by the loop below).
- std::vector<Function*> prototypesToGen;
-
- // Examine all the instructions in this function to find the intrinsics that
- // need to be lowered.
- for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB)
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
- if (CallInst *CI = dyn_cast<CallInst>(I++))
- if (Function *F = CI->getCalledFunction())
- switch (F->getIntrinsicID()) {
- case Intrinsic::not_intrinsic:
- case Intrinsic::vastart:
- case Intrinsic::vacopy:
- case Intrinsic::vaend:
- case Intrinsic::returnaddress:
- case Intrinsic::frameaddress:
- case Intrinsic::setjmp:
- case Intrinsic::longjmp:
- case Intrinsic::prefetch:
- case Intrinsic::powi:
- case Intrinsic::x86_sse_cmp_ss:
- case Intrinsic::x86_sse_cmp_ps:
- case Intrinsic::x86_sse2_cmp_sd:
- case Intrinsic::x86_sse2_cmp_pd:
- case Intrinsic::ppc_altivec_lvsl:
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::sadd_with_overflow:
- // We directly implement these intrinsics
- break;
- default:
- // If this is an intrinsic that directly corresponds to a GCC
- // builtin, we handle it.
- const char *BuiltinName = "";
-#define GET_GCC_BUILTIN_NAME
-#include "llvm/Intrinsics.gen"
-#undef GET_GCC_BUILTIN_NAME
- // If we handle it, don't lower it.
- if (BuiltinName[0]) break;
-
- // All other intrinsic calls we must lower.
- Instruction *Before = 0;
- if (CI != &BB->front())
- Before = prior(BasicBlock::iterator(CI));
-
- IL->LowerIntrinsicCall(CI);
- if (Before) { // Move iterator to instruction after call
- I = Before; ++I;
- } else {
- I = BB->begin();
- }
- // If the intrinsic got lowered to another call, and that call has
- // a definition then we need to make sure its prototype is emitted
- // before any calls to it.
- if (CallInst *Call = dyn_cast<CallInst>(I))
- if (Function *NewF = Call->getCalledFunction())
- if (!NewF->isDeclaration())
- prototypesToGen.push_back(NewF);
-
- break;
- }
-
- // We may have collected some prototypes to emit in the loop above.
- // Emit them now, before the function that uses them is emitted. But,
- // be careful not to emit them twice.
- std::vector<Function*>::iterator I = prototypesToGen.begin();
- std::vector<Function*>::iterator E = prototypesToGen.end();
- for ( ; I != E; ++I) {
- if (intrinsicPrototypesAlreadyGenerated.insert(*I).second) {
- Out << '\n';
- emitFunctionSignature(*I, true);
- Out << ";\n";
- }
- }
- }
-
void GenWriter::visitCallInst(CallInst &I)
{
+ Value *dst = &I;
+ Value *Callee = I.getCalledValue();
+ GBE_ASSERT(ctx.getFunction().getProfile() == ir::PROFILE_OCL);
+ GBE_ASSERT(isa<InlineAsm>(I.getCalledValue()) == false);
+ GBE_ASSERT(I.hasStructRetAttr() == false);
+#if GBE_DEBUG
+ if (Function *F = I.getCalledFunction())
+ GBE_ASSERT(F->getIntrinsicID() == 0);
+#endif /* GBE_DEBUG */
+ // With OCL there is no side effect for any called functions. So do nothing
+ // when there is no returned value
+ if (I.getType() == Type::getVoidTy(I.getContext()))
+ return;
+
+ // Get the name of the called function and handle it. We should use a hash
+ // map later
+ const std::string fnName = Callee->getName();
+ if (fnName == "__gen_ocl_get_global_id0")
+ this->registerMap[dst] = ir::ocl::gid0;
+ else if (fnName == "__gen_ocl_get_global_id1")
+ this->registerMap[dst] = ir::ocl::gid1;
+ else if (fnName == "__gen_ocl_get_global_id2")
+ this->registerMap[dst] = ir::ocl::gid2;
+ else if (fnName == "__gen_ocl_get_local_id0")
+ this->registerMap[dst] = ir::ocl::lid0;
+ else if (fnName == "__gen_ocl_get_local_id1")
+ this->registerMap[dst] = ir::ocl::lid1;
+ else if (fnName == "__gen_ocl_get_local_id2")
+ this->registerMap[dst] = ir::ocl::lid2;
+
#if 0
- if (isa<InlineAsm>(I.getCalledValue()))
return visitInlineAsm(I);
bool WroteCallee = false;
return;
Value *Callee = I.getCalledValue();
-
+ Out << (Callee->getName());
PointerType *PTy = cast<PointerType>(Callee->getType());
FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
Out << ')';
}
- void GenWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
- gep_type_iterator E, bool Static) {
-
- // If there are no indices, just print out the pointer.
- if (I == E) {
- writeOperand(Ptr);
- return;
- }
-
- // Find out if the last index is into a vector. If so, we have to print this
- // specially. Since vectors can't have elements of indexable type, only the
- // last index could possibly be of a vector element.
- VectorType *LastIndexIsVector = 0;
- {
- for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI)
- LastIndexIsVector = dyn_cast<VectorType>(*TmpI);
- }
-
- Out << "(";
-
- // If the last index is into a vector, we can't print it as &a[i][j] because
- // we can't index into a vector with j in GCC. Instead, emit this as
- // (((float*)&a[i])+j)
- if (LastIndexIsVector) {
- Out << "((";
- printType(Out, PointerType::getUnqual(LastIndexIsVector->getElementType()));
- Out << ")(";
- }
-
- Out << '&';
-
- // If the first index is 0 (very typical) we can do a number of
- // simplifications to clean up the code.
- Value *FirstOp = I.getOperand();
- if (!isa<Constant>(FirstOp) || !cast<Constant>(FirstOp)->isNullValue()) {
- // First index isn't simple, print it the hard way.
- writeOperand(Ptr);
- } else {
- ++I; // Skip the zero index.
-
- // Okay, emit the first operand. If Ptr is something that is already address
- // exposed, like a global, avoid emitting (&foo)[0], just emit foo instead.
- if (isAddressExposed(Ptr)) {
- writeOperandInternal(Ptr, Static);
- } else if (I != E && (*I)->isStructTy()) {
- // If we didn't already emit the first operand, see if we can print it as
- // P->f instead of "P[0].f"
- writeOperand(Ptr);
- Out << "->field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
- ++I; // eat the struct index as well.
- } else {
- // Instead of emitting P[0][1], emit (*P)[1], which is more idiomatic.
- Out << "(*";
- writeOperand(Ptr);
- Out << ")";
- }
- }
-
- for (; I != E; ++I) {
- if ((*I)->isStructTy()) {
- Out << ".field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
- } else if ((*I)->isArrayTy()) {
- Out << ".array[";
- writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
- Out << ']';
- } else if (!(*I)->isVectorTy()) {
- Out << '[';
- writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
- Out << ']';
- } else {
- // If the last index is into a vector, then print it out as "+j)". This
- // works with the 'LastIndexIsVector' code above.
- if (isa<Constant>(I.getOperand()) &&
- cast<Constant>(I.getOperand())->isNullValue()) {
- Out << "))"; // avoid "+0".
- } else {
- Out << ")+(";
- writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
- Out << "))";
- }
- }
- }
- Out << ")";
- }
-
static INLINE ir::MemorySpace addressSpaceLLVMToGen(unsigned llvmMemSpace) {
switch (llvmMemSpace) {
case 0: return ir::MEM_GLOBAL;
this->visitLoadOrStore<false>(I);
}
- void GenWriter::visitGetElementPtrInst(GetElementPtrInst &I) {
- printGEPExpression(I.getPointerOperand(), gep_type_begin(I),
- gep_type_end(I), false);
- }
-
+#if 0
void GenWriter::visitInsertElementInst(InsertElementInst &I) {
Type *EltTy = I.getType()->getElementType();
writeOperand(I.getOperand(0));
} else {
Value *Op = SVI.getOperand((unsigned)SrcVal >= NumElts);
if (isa<Instruction>(Op)) {
- // Do an extractelement of this value from the appropriate input.
+ // Do an extractelement of this value from the appropriate i. So do
+ // nothing when there is no returned valuenput.
Out << "((";
printType(Out, PointerType::getUnqual(EltTy));
Out << ")(&" << GetValueName(Op)
}
Out << ")";
}
+#endif
llvm::FunctionPass *createGenPass(ir::Unit &unit) {
return new GenWriter(unit);
--- /dev/null
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+/**
+ * \file llvm_gen_backend.hpp
+ * \author Benjamin Segovia <benjamin.segovia@intel.com>
+ *
+ * Pass generation functions
+ */
+#ifndef __GBE_LLVM_GEN_BACKEND_HPP__
+#define __GBE_LLVM_GEN_BACKEND_HPP__
+
+#include "llvm/Pass.h"
+#include "sys/platform.hpp"
+
+// LLVM Type
+namespace llvm { class Type; }
+
+namespace gbe
+{
+ // Final target of the Gen backend
+ namespace ir { class Unit; }
+
+ /*! Pad the offset */
+ uint32_t getPadding(uint32_t offset, uint32_t align);
+
+ /*! Get the type alignment in bytes */
+ uint32_t getAlignmentByte(const ir::Unit &unit, llvm::Type* Ty);
+
+ /*! Get the type size in bits */
+ uint32_t getTypeBitSize(const ir::Unit &unit, llvm::Type* Ty);
+
+ /*! Get the type size in bytes */
+ uint32_t getTypeByteSize(const ir::Unit &unit, llvm::Type* Ty);
+
+ /*! Create a Gen-IR unit */
+ llvm::FunctionPass *createGenPass(ir::Unit &unit);
+
+ /*! Remove the GEP instructions */
+ llvm::BasicBlockPass *createRemoveGEPPass(const ir::Unit &unit);
+
+} /* namespace gbe */
+
+#endif /* __GBE_LLVM_GEN_BACKEND_HPP__ */
+
--- /dev/null
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+/**
+ * \file llvm_to_gen.cpp
+ * \author Benjamin Segovia <benjamin.segovia@intel.com>
+ */
+
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/ConstantsScanner.h"
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Config/config.h"
+
+#include "llvm/llvm_gen_backend.hpp"
+#include "ir/unit.hpp"
+#include "sys/map.hpp"
+
+using namespace llvm;
+
+namespace gbe
+{
+ uint32_t getPadding(uint32_t offset, uint32_t align) {
+ return (align - (offset % align)) % align;
+ }
+
+ uint32_t getAlignmentByte(const ir::Unit &unit, Type* Ty)
+ {
+ const uint32_t MAX_ALIGN = 8; //maximum size is 8 for doubles
+
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: NOT_SUPPORTED;
+ case Type::VectorTyID: NOT_SUPPORTED;
+ case Type::PointerTyID:
+ case Type::IntegerTyID:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ return getTypeBitSize(unit, Ty)/8;
+ case Type::ArrayTyID:
+ return getAlignmentByte(unit, cast<ArrayType>(Ty)->getElementType());
+ case Type::StructTyID:
+ {
+ const StructType* StrTy = cast<StructType>(Ty);
+ uint32_t maxa = 0;
+ for(uint32_t subtype = 0; subtype < StrTy->getNumElements(); subtype++)
+ {
+ maxa = std::max(getAlignmentByte(unit, StrTy->getElementType(subtype)), maxa);
+ if(maxa==MAX_ALIGN)
+ return maxa;
+ }
+ return maxa;
+ }
+ default: NOT_SUPPORTED;
+ }
+ return 0u;
+ }
+
+ uint32_t getTypeBitSize(const ir::Unit &unit, Type* Ty)
+ {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: NOT_SUPPORTED;
+ case Type::PointerTyID: return unit.getPointerSize();
+ case Type::IntegerTyID: return cast<IntegerType>(Ty)->getBitWidth();
+ case Type::FloatTyID: return 32;
+ case Type::DoubleTyID: return 64;
+ case Type::VectorTyID:
+ {
+ const VectorType* VecTy = cast<VectorType>(Ty);
+ return VecTy->getNumElements() * getTypeBitSize(unit, VecTy->getElementType());
+ }
+ case Type::ArrayTyID:
+ {
+ const ArrayType* ArrTy = cast<ArrayType>(Ty);
+ Type* elementType = ArrTy->getElementType();
+ uint32_t size_element = getTypeBitSize(unit, elementType);
+ uint32_t size = ArrTy->getNumElements() * size_element;
+ uint32_t align = 8 * getAlignmentByte(unit, elementType);
+ size += (ArrTy->getNumElements()-1) * getPadding(size_element, align);
+ return size;
+ }
+ case Type::StructTyID:
+ {
+ const StructType* StrTy = cast<StructType>(Ty);
+ uint32_t size = 0;
+ for(uint32_t subtype=0; subtype < StrTy->getNumElements(); subtype++)
+ {
+ Type* elementType = StrTy->getElementType(subtype);
+ uint32_t align = 8 * getAlignmentByte(unit, elementType);
+ size += getPadding(size, align);
+ size += getTypeBitSize(unit, elementType);
+ }
+ return size;
+ }
+ default: NOT_SUPPORTED;
+ }
+ return 0u;
+ }
+
+ uint32_t getTypeByteSize(const ir::Unit &unit, Type* Ty)
+ {
+ uint32_t size_bit = getTypeBitSize(unit, Ty);
+ assert((size_bit%8==0) && "no multiple of 8");
+ return size_bit/8;
+ }
+
+ class GenRemoveGEPPasss : public BasicBlockPass
+ {
+
+ public:
+ static char ID;
+#define FORMER_VERSION 0
+#if FORMER_VERSION
+ GenRemoveGEPPasss(map<const Value *, const Value *>&
+ parentCompositePointer)
+ : BasicBlockPass(ID),
+ parentPointers(parentCompositePointer) {}
+ map<const Value *, const Value *>& parentPointers;
+#else
+ GenRemoveGEPPasss(const ir::Unit &unit) :
+ BasicBlockPass(ID),
+ unit(unit) {}
+ const ir::Unit &unit;
+#endif
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+
+ virtual const char *getPassName() const {
+ return "PTX backend: insert special ptx instructions";
+ }
+
+ bool simplifyGEPInstructions(GetElementPtrInst* GEPInst);
+
+ virtual bool runOnBasicBlock(BasicBlock &BB)
+ {
+ bool changedBlock = false;
+ iplist<Instruction>::iterator I = BB.getInstList().begin();
+ for (auto nextI = I, E = --BB.getInstList().end(); I != E; I = nextI) {
+ iplist<Instruction>::iterator I = nextI++;
+ if(GetElementPtrInst* gep = dyn_cast<GetElementPtrInst>(&*I))
+ changedBlock = (simplifyGEPInstructions(gep) || changedBlock);
+ }
+ return changedBlock;
+ }
+ };
+
+ char GenRemoveGEPPasss::ID = 0;
+
+ bool GenRemoveGEPPasss::simplifyGEPInstructions(GetElementPtrInst* GEPInst)
+ {
+ const uint32_t ptrSize = unit.getPointerSize();
+ Value* parentPointer = GEPInst->getOperand(0);
+#if FORMER_VERSION
+ Value* topParent = parentPointer;
+#endif
+ CompositeType* CompTy = cast<CompositeType>(parentPointer->getType());
+
+ if(isa<GlobalVariable>(parentPointer)) //HACK: !!!!
+ {
+#if FORMER_VERSION
+ Function *constWrapper =
+ Function::Create(FunctionType::get(parentPointer->getType(),true),
+ GlobalValue::ExternalLinkage,
+ Twine(CONSTWRAPPERNAME));
+
+ std::vector<Value*> params;
+ params.push_back(parentPointer);
+
+ //create and insert wrapper call
+ CallInst * wrapperCall =
+ CallInst::Create(constWrapper,params.begin(), params.end(),"",GEPInst);
+ parentPointer = wrapperCall;
+#else
+ NOT_IMPLEMENTED;
+#endif
+ }
+
+ Value* currentAddrInst =
+ new PtrToIntInst(parentPointer, IntegerType::get(GEPInst->getContext(), ptrSize), "", GEPInst);
+
+ uint32_t constantOffset = 0;
+
+ for(uint32_t op=1; op<GEPInst->getNumOperands(); ++op)
+ {
+ uint32_t TypeIndex;
+ //we have a constant struct/array acces
+ if(ConstantInt* ConstOP = dyn_cast<ConstantInt>(GEPInst->getOperand(op)))
+ {
+ uint32_t offset = 0;
+ TypeIndex = ConstOP->getZExtValue();
+ for(uint32_t ty_i=0; ty_i<TypeIndex; ty_i++)
+ {
+ Type* elementType = CompTy->getTypeAtIndex(ty_i);
+ uint32_t align = getAlignmentByte(unit, elementType);
+ offset += getPadding(offset, align);
+ offset += getTypeByteSize(unit, elementType);
+ }
+
+ //add getPaddingding for accessed type
+ const uint32_t align = getAlignmentByte(unit, CompTy->getTypeAtIndex(TypeIndex));
+ offset += getPadding(offset, align);
+
+ constantOffset += offset;
+ }
+ // none constant index (=> only array/verctor allowed)
+ else
+ {
+ // we only have array/vectors here,
+ // therefore all elements have the same size
+ TypeIndex = 0;
+
+ Type* elementType = CompTy->getTypeAtIndex(TypeIndex);
+ uint32_t size = getTypeByteSize(unit, elementType);
+
+ //add padding
+ uint32_t align = getAlignmentByte(unit, elementType);
+ size += getPadding(size, align);
+
+ Constant* newConstSize =
+ ConstantInt::get(IntegerType::get(GEPInst->getContext(), ptrSize), size);
+
+ Value *operand = GEPInst->getOperand(op);
+
+ //HACK TODO: Inserted by type replacement.. this code could break something????
+ if(getTypeByteSize(unit, operand->getType())>4)
+ {
+ GBE_ASSERTM(false, "CHECK IT");
+ operand->dump();
+
+ //previous instruction is sext or zext instr. ignore it
+ CastInst *cast = dyn_cast<CastInst>(operand);
+ if(cast && (isa<ZExtInst>(operand) || isa<SExtInst>(operand)))
+ {
+ //hope that CastInst is a s/zext
+ operand = cast->getOperand(0);
+ }
+ else
+ {
+ //trunctate
+ operand =
+ new TruncInst(operand,
+ IntegerType::get(GEPInst->getContext(),
+ ptrSize),
+ "", GEPInst);
+ }
+ }
+
+ BinaryOperator* tmpMul =
+ BinaryOperator::Create(Instruction::Mul, newConstSize, operand,
+ "", GEPInst);
+ currentAddrInst =
+ BinaryOperator::Create(Instruction::Add, currentAddrInst, tmpMul,
+ "", GEPInst);
+ }
+
+ //step down in type hirachy
+ CompTy = dyn_cast<CompositeType>(CompTy->getTypeAtIndex(TypeIndex));
+ }
+
+ //insert addition of new offset before GEPInst
+ Constant* newConstOffset =
+ ConstantInt::get(IntegerType::get(GEPInst->getContext(),
+ ptrSize),
+ constantOffset);
+ currentAddrInst =
+ BinaryOperator::Create(Instruction::Add, currentAddrInst,
+ newConstOffset, "", GEPInst);
+
+ //convert offset to ptr type (nop)
+ IntToPtrInst* intToPtrInst =
+ new IntToPtrInst(currentAddrInst,GEPInst->getType(),"", GEPInst);
+
+ //replace uses of the GEP instruction with the newly calculated pointer
+ GEPInst->replaceAllUsesWith(intToPtrInst);
+ GEPInst->dropAllReferences();
+ GEPInst->removeFromParent();
+
+#if FORMER_VERSION
+ //insert new pointer into parent list
+ while(parentPointers.find(topParent)!=parentPointers.end())
+ topParent = parentPointers.find(topParent)->second;
+ parentPointers[intToPtrInst] = topParent;
+#endif
+
+ return true;
+ }
+
+ BasicBlockPass *createRemoveGEPPass(const ir::Unit &unit) {
+ return new GenRemoveGEPPasss(unit);
+ }
+} /* namespace gbe */
+
* Author: Benjamin Segovia <benjamin.segovia@intel.com>
*/
-//===-- llc.cpp - Implement the LLVM Native Code Generator ----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This is the llc code generator driver. It provides a convenient
-// command-line interface for generating native assembly-language code
-// or C code, given LLVM bitcode.
-//
-//===----------------------------------------------------------------------===//
+/**
+ * \file llvm_to_gen.cpp
+ * \author Benjamin Segovia <benjamin.segovia@intel.com>
+ */
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/IRReader.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/llvm_gen_backend.hpp"
#include "llvm/llvm_to_gen.hpp"
#include "sys/platform.hpp"
namespace gbe
{
- llvm::FunctionPass *createGenPass(ir::Unit &unit);
-
void llvmToGen(ir::Unit &unit, const char *fileName)
{
using namespace llvm;
Module &mod = *M.get();
llvm::PassManager passes;
+ passes.add(createRemoveGEPPass(unit));
+ passes.add(createConstantPropagationPass());
+ passes.add(createDeadInstEliminationPass()); // remove simplified instructions
+ passes.add(createLowerSwitchPass());
+ passes.add(createPromoteMemoryToRegisterPass());
+ passes.add(createGVNPass()); // Remove redundancies
passes.add(createGenPass(unit));
passes.run(mod);
}
#else
#define GBE_ASSERT(EXPR) do { } while (0)
#define GBE_ASSERTM(EXPR, MSG) do { } while (0)
-#endif
+#endif /* GBE_DEBUG */
+
+#define NOT_IMPLEMENTED GBE_ASSERTM (false, "Not implemented")
+#define NOT_SUPPORTED GBE_ASSERTM (false, "Not supported")
/*! Fatal error macros */
-#define NOT_IMPLEMENTED FATAL ("Not implemented")
#define FATAL_IF(COND, MSG) \
do { \
if(UNLIKELY(COND)) FATAL(MSG); \