+#include <stdlib.h>
__kernel unsigned int add(unsigned int x, unsigned int y)
{
return x + y;
+#include <stdlib.h>
struct big{
unsigned int a, b;
};
+#include <stdlib.h>
__kernel void test_cmp(__global bool *dst, int x, int y, float z, float w)
{
dst[0] = (x < y) + (z > w);
-#include "stdlib.h"
+#include <stdlib.h>
__kernel void cmp_cvt(__global int *dst, int x, int y)
{
+#include <stdlib.h>
__kernel void cycle(global int *dst)
{
int x, y;
-#include "stdlib.h"
+#include <stdlib.h>
__kernel void extract(__global int4 *dst, __global int4 *src, int c)
{
const int4 from = src[0];
+#include <stdlib.h>
struct struct0
{
int hop[5];
int x, y, z;
};
-#define __global __attribute__((address_space(1)))
__kernel void param(__global struct struct0 *dst, struct struct0 s, __local int *h, int x, int y)
{
s.hop[4] += x + h[4];
-#include "stdlib.h"
+#include <stdlib.h>
__kernel void test_global_id(__global int *dst, __global int *p)
{
-#include "stdlib.h"
+#include <stdlib.h>
+
__kernel void insert(__global int4 *dst, __global int4 *src, int c)
{
int4 x = src[0];
+#include <stdlib.h>
__kernel void add(__global int *dst, unsigned int x)
{
for (int i = 0; i < x; ++i) dst[i]++;
-typedef float float4 __attribute__((ext_vector_type(4)));
+#include <stdlib.h>
__attribute__((pure, overloadable)) int mad(int,int,int);
__attribute__((pure, overloadable)) float mad(float,float,float);
__attribute__((pure, overloadable)) float4 mad(float4,float4,float4);
-#include "stdlib.h"
+#include <stdlib.h>
__kernel void test_select(__global int4 *dst,
__global int4 *src0,
+#include <stdlib.h>
__kernel void short_write(__global short *dst, short x, short y)
{
dst[0] = x + y;
-#include "stdlib.h"
+#include <stdlib.h>
__kernel void shuffle(__global int4 *dst, __global int4 *src, int c)
{
const int4 from = src[0];
-#include "stdlib.h"
+#include <stdlib.h>
__kernel void simple_float4(__global float4 *dst, __global float4 *src)
{
-#include "stdlib.h"
+#include <stdlib.h>
__kernel void simple_float4(__global float4 *dst, __global float4 *src)
{
-#include "stdlib.h"
+#include <stdlib.h>
__kernel void simple_float4(__global float4 *dst, __global float4 *src, bool b)
{
return dst;
}
+#define __global __attribute__((address_space(1)))
+#define global __global
+
+#include <stdlib.h>
__kernel void store(__global int *dst, __local int *dst0, int x)
{
dst[0] = 1;
+#include <stdlib.h>
struct my_struct {
int a;
int b[2];
+#include <stdlib.h>
struct my_struct {
int a;
int b[2];
-#include "stdlib.h"
+#include <stdlib.h>
__kernel void test_select(__global int *dst, __global int *src)
{
+#include <stdlib.h>
__kernel void undefined(__global int *dst)
{
int x;
+#include <stdlib.h>
__kernel void hop() {}
BasicBlock *bb; //!< Basic block currently processed
vector<uint8_t> *usedLabels; //!< Store all labels that are defined
};
- vector<StackElem> fnStack; //!< Stack of functions still to finish
+ vector<StackElem> fnStack; //!< Stack of functions still to finish
GBE_CLASS(Context);
};
namespace ir {
Function::Function(const std::string &name, Profile profile) :
- name(name), structReturned(false), profile(profile)
+ name(name), profile(profile)
{
initProfile(*this);
}
std::ostream &operator<< (std::ostream &out, const Function &fn)
{
out << ".decl_function " << fn.getName() << std::endl;
- out << ".return_struct " << (fn.isStructReturned() ? "true" : "false") << std::endl;
out << fn.getRegisterFile();
out << "## " << fn.inputNum() << " input register"
<< plural(fn.inputNum()) << " ##" << std::endl;
GBE_ASSERT(blocks[ID] != NULL);
return *blocks[ID];
}
- /*! Function returns a structure by pointer (see ptx32 ABI) */
- INLINE void setStructReturned(bool isReturned) { structReturned = isReturned; }
- /*! Indicate if a structure is returned from the function */
- INLINE bool isStructReturned(void) const { return structReturned; }
/*! Create a new label (still not bound to a basic block) */
LabelIndex newLabel(void);
/*! Number of registers in the register file */
vector<BasicBlock*> blocks; //!< All chained basic blocks
RegisterFile file; //!< RegisterDatas used by the instructions
GrowingPool<Instruction> insnPool; //!< For fast instruction allocation
- bool structReturned; //!< First argument is pointer to struct
Profile profile; //!< Current function profile
GBE_CLASS(Function);
};
void GenWriter::emitFunctionPrototype(Function &F)
{
- const bool returnStruct = F.hasStructRetAttr();
-
+ GBE_ASSERTM(F.hasStructRetAttr() == false,
+ "Returned value for kernel functions");
// Loop over the arguments and output registers for them
if (!F.arg_empty()) {
Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
- // When a struct is returned, first argument is pointer to the structure
- if (returnStruct)
- ctx.getFunction().setStructReturned(true);
-
// Insert a new register for each function argument
for (; I != E; ++I) {
const Type *type = I->getType();
// When returning a structure, first input register is the pointer to the
// structure
- if (!returnStruct) {
- const Type *type = F.getReturnType();
- if (type->isVoidTy() == false) {
- const ir::RegisterFamily family = getFamily(ctx, type);
- const ir::Register reg = ctx.reg(family);
- ctx.output(reg);
- }
- }
+ const Type *type = F.getReturnType();
+ GBE_ASSERTM(type->isVoidTy() == true,
+ "Returned value for kernel functions");
#if GBE_DEBUG
// Variable number of arguments is not supported
void GenWriter::emitFunction(Function &F)
{
+ switch (F.getCallingConv()) {
+ case CallingConv::PTX_Device: // we do not emit device function
+ return;
+ case CallingConv::PTX_Kernel:
+ break;
+ default: GBE_ASSERTM(false, "Unsupported calling convention");
+ }
+
ctx.startFunction(F.getName());
this->regTranslator.clear();
this->labelMap.clear();
case Instruction::LShr: ctx.SHR(type, dst, src0, src1); break;
case Instruction::AShr: ctx.ASR(type, dst, src0, src1); break;
default: NOT_SUPPORTED;
- };
+ }
}
}
case ICmpInst::ICMP_UGT: ctx.LE(unsignedType, dst, src0, src1); break;
case ICmpInst::ICMP_SGT: ctx.LE(signedType, dst, src0, src1); break;
default: NOT_SUPPORTED;
- };
+ }
}
// Nothing special to do
else {
case ICmpInst::ICMP_UGT: ctx.GT(unsignedType, dst, src0, src1); break;
case ICmpInst::ICMP_SGT: ctx.GT(signedType, dst, src0, src1); break;
default: NOT_SUPPORTED;
- };
+ }
}
}
}
case ICmpInst::FCMP_OGT:
case ICmpInst::FCMP_UGT: ctx.GT(type, dst, src0, src1); break;
default: NOT_SUPPORTED;
- };
+ }
}
}
}
break;
default: NOT_SUPPORTED;
- };
+ }
}
/*! Once again, it is a templated functor. No lambda */
static INLINE ir::MemorySpace addressSpaceLLVMToGen(unsigned llvmMemSpace) {
switch (llvmMemSpace) {
- case 0: return ir::MEM_GLOBAL;
+ case 0: return ir::MEM_PRIVATE;
+ case 1: return ir::MEM_GLOBAL;
case 4: return ir::MEM_LOCAL;
}
GBE_ASSERT(false);
fclose(dummyKernel);
//UTEST_EXPECT_SUCCESS(utestLLVM2Gen("function_param.ll"));
- UTEST_EXPECT_SUCCESS(utestLLVM2Gen("loop.ll"));
+ //UTEST_EXPECT_SUCCESS(utestLLVM2Gen("loop.ll"));
+ UTEST_EXPECT_SUCCESS(utestLLVM2Gen("function.ll"));
//UTEST_EXPECT_SUCCESS(utestLLVM2Gen("mad.ll"));
#if 0
UTEST_EXPECT_SUCCESS(utestLLVM2Gen("select.ll"));