for (valueID = 0; valueID < insn.getDstNum(); ++valueID)
dst[valueID] = sel.selReg(insn.getDst(valueID), insn.getDstType());
- if (!insn.is3D())
- srcNum--;
+ GBE_ASSERT(srcNum == 3);
+ if (insn.getSrc(1) == ir::ocl::invalid) //not 3D
+ srcNum = 1;
+ else if (insn.getSrc(2) == ir::ocl::invalid)
+ srcNum = 2;
if (insn.getSamplerOffset() != 0) {
- // U, lod, V, [W]
+ // U, lod, [V], [W]
GBE_ASSERT(insn.getSrcType() != TYPE_FLOAT);
msgPayloads[0] = sel.selReg(insn.getSrc(0), insn.getSrcType());
msgPayloads[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
- msgPayloads[2] = sel.selReg(insn.getSrc(1), insn.getSrcType());
+ if (srcNum > 1)
+ msgPayloads[2] = sel.selReg(insn.getSrc(1), insn.getSrcType());
if (srcNum > 2)
msgPayloads[3] = sel.selReg(insn.getSrc(2), insn.getSrcType());
// Clear the lod to zero.
msgs[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
for(uint32_t msgID = 1; msgID < 1 + coordNum; msgID++, valueID++)
msgs[msgID] = sel.selReg(insn.getSrc(msgID - 1), insn.getCoordType());
+
+ // fake u.
+ if (insn.getSrc(1) == ir::ocl::invalid)
+ msgs[2] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
// fake w.
- if (!insn.is3D())
+ if (insn.getSrc(2) == ir::ocl::invalid)
msgs[3] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
// LOD.
msgs[4] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
uint32_t bti = insn.getImageIndex();
if (simdWidth == 8)
- sel.TYPED_WRITE(msgs, msgNum, bti, insn.is3D());
+ sel.TYPED_WRITE(msgs, msgNum, bti, insn.getSrc(2) != ir::ocl::invalid);
else {
sel.push();
sel.curr.execWidth = 8;
sel.curr.quarterControl = (quarter == 0) ? GEN_COMPRESSION_Q1 : GEN_COMPRESSION_Q2;
// Set U,V,W
QUARTER_MOV0(msgs, 1, sel.selReg(insn.getSrc(0), insn.getCoordType()));
- QUARTER_MOV0(msgs, 2, sel.selReg(insn.getSrc(1), insn.getCoordType()));
- if (insn.is3D())
+ if (insn.getSrc(1) != ir::ocl::invalid) //not 2D
+ QUARTER_MOV0(msgs, 2, sel.selReg(insn.getSrc(1), insn.getCoordType()));
+ if (insn.getSrc(2) != ir::ocl::invalid) //not 3D
QUARTER_MOV0(msgs, 3, sel.selReg(insn.getSrc(2), insn.getCoordType()));
// Set R, G, B, A
QUARTER_MOV1(msgs, 5, sel.selReg(insn.getSrc(3), insn.getSrcType()));
QUARTER_MOV1(msgs, 6, sel.selReg(insn.getSrc(4), insn.getSrcType()));
QUARTER_MOV1(msgs, 7, sel.selReg(insn.getSrc(5), insn.getSrcType()));
QUARTER_MOV1(msgs, 8, sel.selReg(insn.getSrc(6), insn.getSrcType()));
- sel.TYPED_WRITE(msgs, msgNum, bti, insn.is3D());
+ sel.TYPED_WRITE(msgs, msgNum, bti, insn.getSrc(2) != ir::ocl::invalid);
#undef QUARTER_MOV0
#undef QUARTER_MOV1
}
public TupleDstPolicy<SampleInstruction>
{
public:
- SampleInstruction(uint8_t imageIdx, Tuple dstTuple, Tuple srcTuple, bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset, bool is3D) {
+ SampleInstruction(uint8_t imageIdx, Tuple dstTuple, Tuple srcTuple, bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset) {
this->opcode = OP_SAMPLE;
this->dst = dstTuple;
this->src = srcTuple;
this->samplerIdx = sampler;
this->imageIdx = imageIdx;
this->samplerOffset = samplerOffset;
- this->is3DRead = is3D;
}
INLINE bool wellFormed(const Function &fn, std::string &why) const;
INLINE void out(std::ostream &out, const Function &fn) const {
INLINE Type getDstType(void) const { return this->dstIsFloat ? TYPE_FLOAT : TYPE_U32; }
INLINE const uint8_t getSamplerIndex(void) const { return this->samplerIdx; }
INLINE const uint8_t getSamplerOffset(void) const { return this->samplerOffset; }
- INLINE const bool is3D(void) const { return !!this->is3DRead; }
uint8_t srcIsFloat:1;
uint8_t dstIsFloat:1;
uint8_t samplerIdx:4;
uint8_t samplerOffset:1;
- uint8_t is3DRead:1;
uint8_t imageIdx;
static const uint32_t srcNum = 3;
static const uint32_t dstNum = 4;
{
public:
- INLINE TypedWriteInstruction(uint8_t imageIdx, Tuple srcTuple, Type srcType, Type coordType, bool is3D) {
+ INLINE TypedWriteInstruction(uint8_t imageIdx, Tuple srcTuple, Type srcType, Type coordType) {
this->opcode = OP_TYPED_WRITE;
this->src = srcTuple;
this->coordType = coordType;
this->srcType = srcType;
this->imageIdx = imageIdx;
- this->is3DWrite = is3D;
}
INLINE bool wellFormed(const Function &fn, std::string &why) const;
INLINE void out(std::ostream &out, const Function &fn) const {
uint8_t srcType;
uint8_t coordType;
uint8_t imageIdx;
- uint8_t is3DWrite;
-
- INLINE const bool is3D(void) const { return !!this->is3DWrite; }
INLINE const uint8_t getImageIndex(void) const { return this->imageIdx; }
INLINE Type getSrcType(void) const { return (Type)this->srcType; }
DECL_MEM_FN(SampleInstruction, Type, getSrcType(void), getSrcType())
DECL_MEM_FN(SampleInstruction, Type, getDstType(void), getDstType())
DECL_MEM_FN(SampleInstruction, const uint8_t, getSamplerIndex(void), getSamplerIndex())
-DECL_MEM_FN(SampleInstruction, const bool, is3D(void), is3D())
DECL_MEM_FN(SampleInstruction, const uint8_t, getSamplerOffset(void), getSamplerOffset())
DECL_MEM_FN(SampleInstruction, const uint8_t, getImageIndex(void), getImageIndex())
DECL_MEM_FN(TypedWriteInstruction, Type, getSrcType(void), getSrcType())
DECL_MEM_FN(TypedWriteInstruction, Type, getCoordType(void), getCoordType())
DECL_MEM_FN(TypedWriteInstruction, const uint8_t, getImageIndex(void), getImageIndex())
-DECL_MEM_FN(TypedWriteInstruction, const bool, is3D(void), is3D())
DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
DECL_MEM_FN(GetImageInfoInstruction, const uint8_t, getImageIndex(void), getImageIndex())
}
// SAMPLE
- Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src, bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset, bool is3D) {
- return internal::SampleInstruction(imageIndex, dst, src, dstIsFloat, srcIsFloat, sampler, samplerOffset, is3D).convert();
+ Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src, bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset) {
+ return internal::SampleInstruction(imageIndex, dst, src, dstIsFloat, srcIsFloat, sampler, samplerOffset).convert();
}
- Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, Type srcType, Type coordType, bool is3D) {
- return internal::TypedWriteInstruction(imageIndex, src, srcType, coordType, is3D).convert();
+ Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, Type srcType, Type coordType) {
+ return internal::TypedWriteInstruction(imageIndex, src, srcType, coordType).convert();
}
Instruction GET_IMAGE_INFO(int infoType, Register dst, uint8_t imageIndex, Register infoReg) {
class TypedWriteInstruction : public Instruction {
public:
/*! Return true if the given instruction is an instance of this class */
- const bool is3D() const;
static bool isClassOf(const Instruction &insn);
const uint8_t getImageIndex() const;
Type getSrcType(void) const;
/*! Load texels from a texture */
class SampleInstruction : public Instruction {
public:
- const bool is3D() const;
const uint8_t getImageIndex() const;
const uint8_t getSamplerIndex(void) const;
const uint8_t getSamplerOffset(void) const;
/*! sync.params... (see Sync instruction) */
Instruction SYNC(uint32_t parameters);
/*! typed write */
- Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, Type srcType, Type coordType, bool is3D);
+ Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, Type srcType, Type coordType);
/*! sample textures */
- Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src, bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset, bool is3D);
+ Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src, bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset);
/*! get image information , such as width/height/depth/... */
Instruction GET_IMAGE_INFO(int infoType, Register dst, uint8_t imageIndex, Register infoReg);
/*! label labelIndex */
"barrier_id", "thread_number", "work_dimension",
"zero", "one",
"retVal", "slm_offset",
- "printf_buffer_pointer", "printf_index_buffer_pointer"
+ "printf_buffer_pointer", "printf_index_buffer_pointer",
+ "invalid"
};
#if GBE_DEBUG
DECL_NEW_REG(FAMILY_WORD, slmoffset, 1);
DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1);
DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1);
+ DECL_NEW_REG(FAMILY_DWORD, invalid, 1);
}
#undef DECL_NEW_REG
static const Register slmoffset = Register(27); // Group's SLM offset in total 64K SLM
static const Register printfbptr = Register(28); // printf buffer address .
static const Register printfiptr = Register(29); // printf index buffer address.
- static const uint32_t regNum = 30; // number of special registers
+ static const Register invalid = Register(30); // used for valid comparation.
+ static const uint32_t regNum = 31; // number of special registers
extern const char *specialRegMean[]; // special register name.
} /* namespace ocl */
GBE_CLASS(RegisterFile);
};
- /*! Useful to encode anything special */
- static const Register invalidRegister(RegisterFile::MAX_INDEX);
-
/*! Output the register file string in the given stream */
std::ostream &operator<< (std::ostream &out, const RegisterFile &file);
case GEN_OCL_LGBARRIER:
ctx.getFunction().setUseSLM(true);
break;
- case GEN_OCL_WRITE_IMAGE_I:
- case GEN_OCL_WRITE_IMAGE_UI:
- case GEN_OCL_WRITE_IMAGE_F:
+ case GEN_OCL_WRITE_IMAGE_I_1D:
+ case GEN_OCL_WRITE_IMAGE_UI_1D:
+ case GEN_OCL_WRITE_IMAGE_F_1D:
+ case GEN_OCL_WRITE_IMAGE_I_2D:
+ case GEN_OCL_WRITE_IMAGE_UI_2D:
+ case GEN_OCL_WRITE_IMAGE_F_2D:
case GEN_OCL_WRITE_IMAGE_I_3D:
case GEN_OCL_WRITE_IMAGE_UI_3D:
case GEN_OCL_WRITE_IMAGE_F_3D:
break;
- case GEN_OCL_READ_IMAGE_I:
- case GEN_OCL_READ_IMAGE_UI:
- case GEN_OCL_READ_IMAGE_F:
+ case GEN_OCL_READ_IMAGE_I_1D:
+ case GEN_OCL_READ_IMAGE_UI_1D:
+ case GEN_OCL_READ_IMAGE_F_1D:
+ case GEN_OCL_READ_IMAGE_I_2D:
+ case GEN_OCL_READ_IMAGE_UI_2D:
+ case GEN_OCL_READ_IMAGE_F_2D:
case GEN_OCL_READ_IMAGE_I_3D:
case GEN_OCL_READ_IMAGE_UI_3D:
case GEN_OCL_READ_IMAGE_F_3D:
- case GEN_OCL_READ_IMAGE_I_I:
- case GEN_OCL_READ_IMAGE_UI_I:
- case GEN_OCL_READ_IMAGE_F_I:
+
+ case GEN_OCL_READ_IMAGE_I_1D_I:
+ case GEN_OCL_READ_IMAGE_UI_1D_I:
+ case GEN_OCL_READ_IMAGE_F_1D_I:
+ case GEN_OCL_READ_IMAGE_I_2D_I:
+ case GEN_OCL_READ_IMAGE_UI_2D_I:
+ case GEN_OCL_READ_IMAGE_F_2D_I:
case GEN_OCL_READ_IMAGE_I_3D_I:
case GEN_OCL_READ_IMAGE_UI_3D_I:
case GEN_OCL_READ_IMAGE_F_3D_I:
default: NOT_IMPLEMENTED;
}
} else {
+ int image_dim;
// Get the name of the called function and handle it
Value *Callee = I.getCalledValue();
const std::string fnName = Callee->getName();
ctx.GET_IMAGE_INFO(infoType, reg, surfaceID, infoReg);
break;
}
- case GEN_OCL_READ_IMAGE_I:
- case GEN_OCL_READ_IMAGE_UI:
- case GEN_OCL_READ_IMAGE_F:
+
+ case GEN_OCL_READ_IMAGE_I_1D:
+ case GEN_OCL_READ_IMAGE_UI_1D:
+ case GEN_OCL_READ_IMAGE_F_1D:
+ case GEN_OCL_READ_IMAGE_I_1D_I:
+ case GEN_OCL_READ_IMAGE_UI_1D_I:
+ case GEN_OCL_READ_IMAGE_F_1D_I:
+ image_dim = 1;
+ goto handle_read_image;
+ case GEN_OCL_READ_IMAGE_I_2D:
+ case GEN_OCL_READ_IMAGE_UI_2D:
+ case GEN_OCL_READ_IMAGE_F_2D:
+ case GEN_OCL_READ_IMAGE_I_2D_I:
+ case GEN_OCL_READ_IMAGE_UI_2D_I:
+ case GEN_OCL_READ_IMAGE_F_2D_I:
+ image_dim = 2;
+ goto handle_read_image;
case GEN_OCL_READ_IMAGE_I_3D:
case GEN_OCL_READ_IMAGE_UI_3D:
case GEN_OCL_READ_IMAGE_F_3D:
- case GEN_OCL_READ_IMAGE_I_I:
- case GEN_OCL_READ_IMAGE_UI_I:
- case GEN_OCL_READ_IMAGE_F_I:
case GEN_OCL_READ_IMAGE_I_3D_I:
case GEN_OCL_READ_IMAGE_UI_3D_I:
case GEN_OCL_READ_IMAGE_F_3D_I:
+ image_dim = 3;
+handle_read_image:
{
GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this->getRegister(*AI); ++AI;
const uint8_t surfaceID = ctx.getFunction().getImageSet()->getIdx(surfaceReg);
const uint8_t sampler = this->appendSampler(AI);
++AI;
- GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI;
- GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI;
+ ir::Register ucoord;
+ ir::Register vcoord;
ir::Register wcoord;
- bool is3D = false;
- if (it->second == GEN_OCL_READ_IMAGE_I_3D ||
- it->second == GEN_OCL_READ_IMAGE_UI_3D ||
- it->second == GEN_OCL_READ_IMAGE_F_3D ||
- it->second == GEN_OCL_READ_IMAGE_I_3D_I ||
- it->second == GEN_OCL_READ_IMAGE_UI_3D_I ||
- it->second == GEN_OCL_READ_IMAGE_F_3D_I) {
- GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI;
- is3D = true;
- } else
- wcoord = ucoord; // not used, just a padding.
+
+ GBE_ASSERT(AI != AE); ucoord = this->getRegister(*AI); ++AI;
+ if (image_dim > 1) {
+ GBE_ASSERT(AI != AE);
+ vcoord = this->getRegister(*AI);
+ ++AI;
+ } else {
+ vcoord = ir::ocl::invalid;
+ }
+
+ if (image_dim > 2) {
+ GBE_ASSERT(AI != AE);
+ wcoord = this->getRegister(*AI);
+ ++AI;
+ } else {
+ wcoord = ir::ocl::invalid;
+ }
vector<ir::Register> dstTupleData, srcTupleData;
const uint32_t elemNum = 4;
ir::Type dstType = ir::TYPE_U32;
switch(it->second) {
- case GEN_OCL_READ_IMAGE_I:
- case GEN_OCL_READ_IMAGE_UI:
+ case GEN_OCL_READ_IMAGE_I_1D:
+ case GEN_OCL_READ_IMAGE_UI_1D:
+ case GEN_OCL_READ_IMAGE_I_2D:
+ case GEN_OCL_READ_IMAGE_UI_2D:
case GEN_OCL_READ_IMAGE_I_3D:
case GEN_OCL_READ_IMAGE_UI_3D:
- case GEN_OCL_READ_IMAGE_I_I:
- case GEN_OCL_READ_IMAGE_UI_I:
+ case GEN_OCL_READ_IMAGE_I_1D_I:
+ case GEN_OCL_READ_IMAGE_UI_1D_I:
+ case GEN_OCL_READ_IMAGE_I_2D_I:
+ case GEN_OCL_READ_IMAGE_UI_2D_I:
case GEN_OCL_READ_IMAGE_I_3D_I:
case GEN_OCL_READ_IMAGE_UI_3D_I:
dstType = ir::TYPE_U32;
break;
- case GEN_OCL_READ_IMAGE_F:
+ case GEN_OCL_READ_IMAGE_F_1D:
+ case GEN_OCL_READ_IMAGE_F_2D:
case GEN_OCL_READ_IMAGE_F_3D:
- case GEN_OCL_READ_IMAGE_F_I:
+ case GEN_OCL_READ_IMAGE_F_1D_I:
+ case GEN_OCL_READ_IMAGE_F_2D_I:
case GEN_OCL_READ_IMAGE_F_3D_I:
dstType = ir::TYPE_FLOAT;
break;
bool isFloatCoord = it->second <= GEN_OCL_READ_IMAGE_F_3D;
ctx.SAMPLE(surfaceID, dstTuple, srcTuple, dstType == ir::TYPE_FLOAT,
- isFloatCoord, sampler, samplerOffset, is3D);
+ isFloatCoord, sampler, samplerOffset);
break;
}
- case GEN_OCL_WRITE_IMAGE_I:
- case GEN_OCL_WRITE_IMAGE_UI:
- case GEN_OCL_WRITE_IMAGE_F:
+
+ case GEN_OCL_WRITE_IMAGE_I_1D:
+ case GEN_OCL_WRITE_IMAGE_UI_1D:
+ case GEN_OCL_WRITE_IMAGE_F_1D:
+ image_dim = 1;
+ goto handle_write_image;
+ case GEN_OCL_WRITE_IMAGE_I_2D:
+ case GEN_OCL_WRITE_IMAGE_UI_2D:
+ case GEN_OCL_WRITE_IMAGE_F_2D:
+ image_dim = 2;
+ goto handle_write_image;
case GEN_OCL_WRITE_IMAGE_I_3D:
case GEN_OCL_WRITE_IMAGE_UI_3D:
case GEN_OCL_WRITE_IMAGE_F_3D:
+ image_dim = 3;
+handle_write_image:
{
GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this->getRegister(*AI); ++AI;
const uint8_t surfaceID = ctx.getFunction().getImageSet()->getIdx(surfaceReg);
- GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI;
- GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI;
- ir::Register wcoord;
- bool is3D = false;
- if(it->second >= GEN_OCL_WRITE_IMAGE_I_3D) {
- GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI;
- is3D = true;
+ ir::Register ucoord, vcoord, wcoord;
+
+ GBE_ASSERT(AI != AE); ucoord = this->getRegister(*AI); ++AI;
+
+ if (image_dim > 1) {
+ GBE_ASSERT(AI != AE);
+ vcoord = this->getRegister(*AI);
+ ++AI;
} else
- wcoord = ucoord; // not used, just padding.
+ vcoord = ir::ocl::invalid;
+
+ if (image_dim > 2) {
+ GBE_ASSERT(AI != AE);
+ wcoord = this->getRegister(*AI);
+ ++AI;
+ } else {
+ wcoord = ir::ocl::invalid;
+ }
+
GBE_ASSERT(AI != AE);
vector<ir::Register> srcTupleData;
ir::Type srcType = ir::TYPE_U32;
switch(it->second) {
- case GEN_OCL_WRITE_IMAGE_I:
- case GEN_OCL_WRITE_IMAGE_UI:
+ case GEN_OCL_WRITE_IMAGE_I_1D:
+ case GEN_OCL_WRITE_IMAGE_UI_1D:
+ case GEN_OCL_WRITE_IMAGE_I_2D:
+ case GEN_OCL_WRITE_IMAGE_UI_2D:
case GEN_OCL_WRITE_IMAGE_I_3D:
case GEN_OCL_WRITE_IMAGE_UI_3D:
srcType = ir::TYPE_U32;
break;
- case GEN_OCL_WRITE_IMAGE_F:
+ case GEN_OCL_WRITE_IMAGE_F_1D:
+ case GEN_OCL_WRITE_IMAGE_F_2D:
case GEN_OCL_WRITE_IMAGE_F_3D:
srcType = ir::TYPE_FLOAT;
break;
GBE_ASSERT(0); // never been here.
}
- ctx.TYPED_WRITE(surfaceID, srcTuple, srcType, ir::TYPE_U32, is3D);
+ ctx.TYPED_WRITE(surfaceID, srcTuple, srcType, ir::TYPE_U32);
break;
}
case GEN_OCL_MUL_HI_INT:
DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16, __gen_ocl_force_simd16)
// To read_image functions.
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I, _Z21__gen_ocl_read_imageijtffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI, _Z22__gen_ocl_read_imageuijtffj)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F, _Z21__gen_ocl_read_imagefjtffj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D, _Z21__gen_ocl_read_imageijtfj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D, _Z22__gen_ocl_read_imageuijtfj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D, _Z21__gen_ocl_read_imagefjtfj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D, _Z21__gen_ocl_read_imageijtffj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D, _Z22__gen_ocl_read_imageuijtffj)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D, _Z21__gen_ocl_read_imagefjtffj)
DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D, _Z21__gen_ocl_read_imageijtfffj)
DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D, _Z22__gen_ocl_read_imageuijtfffj)
DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D, _Z21__gen_ocl_read_imagefjtfffj)
// work around read image with the LD message. The coords are integer type.
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_I, _Z21__gen_ocl_read_imageijtiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_I, _Z22__gen_ocl_read_imageuijtiij)
-DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_I, _Z21__gen_ocl_read_imagefjtiij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D_I, _Z21__gen_ocl_read_imageijtij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D_I, _Z22__gen_ocl_read_imageuijtij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D_I, _Z21__gen_ocl_read_imagefjtij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D_I, _Z21__gen_ocl_read_imageijtiij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D_I, _Z22__gen_ocl_read_imageuijtiij)
+DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D_I, _Z21__gen_ocl_read_imagefjtiij)
DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D_I, _Z21__gen_ocl_read_imageijtiiij)
DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D_I, _Z22__gen_ocl_read_imageuijtiiij)
DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D_I, _Z21__gen_ocl_read_imagefjtiiij)
// To write_image functions.
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I, _Z22__gen_ocl_write_imageijiiDv4_i)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI, _Z23__gen_ocl_write_imageuijiiDv4_j)
-DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F, _Z22__gen_ocl_write_imagefjiiDv4_f)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_1D, _Z22__gen_ocl_write_imageijiDv4_i)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_1D, _Z23__gen_ocl_write_imageuijiDv4_j)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_1D, _Z22__gen_ocl_write_imagefjiDv4_f)
+
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_2D, _Z22__gen_ocl_write_imageijiiDv4_i)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_2D, _Z23__gen_ocl_write_imageuijiiDv4_j)
+DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_2D, _Z22__gen_ocl_write_imagefjiiDv4_f)
DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D, _Z22__gen_ocl_write_imageijiiiDv4_i)
DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D, _Z23__gen_ocl_write_imageuijiiiDv4_j)
// Get the function arguments
CallSite CS(call);
- CallSite::arg_iterator CI = CS.arg_begin() + 3;
+ CallSite::arg_iterator CI = CS.arg_begin() + 2;
switch (it->second) {
default: break;
- case GEN_OCL_READ_IMAGE_I:
- case GEN_OCL_READ_IMAGE_UI:
- case GEN_OCL_READ_IMAGE_F:
+ case GEN_OCL_READ_IMAGE_I_1D:
+ case GEN_OCL_READ_IMAGE_UI_1D:
+ case GEN_OCL_READ_IMAGE_F_1D:
+ case GEN_OCL_READ_IMAGE_I_2D:
+ case GEN_OCL_READ_IMAGE_UI_2D:
+ case GEN_OCL_READ_IMAGE_F_2D:
case GEN_OCL_READ_IMAGE_I_3D:
case GEN_OCL_READ_IMAGE_UI_3D:
case GEN_OCL_READ_IMAGE_F_3D:
- case GEN_OCL_READ_IMAGE_I_I:
- case GEN_OCL_READ_IMAGE_UI_I:
- case GEN_OCL_READ_IMAGE_F_I:
+
+ case GEN_OCL_READ_IMAGE_I_1D_I:
+ case GEN_OCL_READ_IMAGE_UI_1D_I:
+ case GEN_OCL_READ_IMAGE_F_1D_I:
+ case GEN_OCL_READ_IMAGE_I_2D_I:
+ case GEN_OCL_READ_IMAGE_UI_2D_I:
+ case GEN_OCL_READ_IMAGE_F_2D_I:
case GEN_OCL_READ_IMAGE_I_3D_I:
case GEN_OCL_READ_IMAGE_UI_3D_I:
case GEN_OCL_READ_IMAGE_F_3D_I:
case GEN_OCL_WRITE_IMAGE_UI_3D:
case GEN_OCL_WRITE_IMAGE_F_3D:
CI++;
- case GEN_OCL_WRITE_IMAGE_I:
- case GEN_OCL_WRITE_IMAGE_UI:
- case GEN_OCL_WRITE_IMAGE_F:
+ case GEN_OCL_WRITE_IMAGE_I_2D:
+ case GEN_OCL_WRITE_IMAGE_UI_2D:
+ case GEN_OCL_WRITE_IMAGE_F_2D:
+ CI++;
+ case GEN_OCL_WRITE_IMAGE_I_1D:
+ case GEN_OCL_WRITE_IMAGE_UI_1D:
+ case GEN_OCL_WRITE_IMAGE_F_1D:
{
*CI = InsertToVector(call, *CI);
break;
// This is a transitional hack to bypass the LLVM 3.3 built-in types.
// See the Khronos SPIR specification for handling of these types.
#define __texture __attribute__((address_space(4)))
+struct _image1d_t;
+typedef __texture struct _image1d_t* __image1d_t;
struct _image2d_t;
typedef __texture struct _image2d_t* __image2d_t;
struct _image3d_t;
typedef __texture struct _image3d_t* __image3d_t;
typedef const ushort __sampler_t;
typedef size_t __event_t;
+#define image1d_t __image1d_t
#define image2d_t __image2d_t
#define image3d_t __image3d_t
#define sampler_t __sampler_t
// Image access functions
/////////////////////////////////////////////////////////////////////////////
+// 1D read
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, uint sampler_offset);
+OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, uint sampler_offset);
+OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, uint sampler_offset);
+OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, uint sampler_offset);
+
+// 2D read
OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset);
OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset);
+// 3D read
OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset);
OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset);
+// 1D write
+OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int4 color);
+OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, uint4 color);
+OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, float4 color);
+
+// 2D write
OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int4 color);
OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, uint4 color);
OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, float4 color);
+// 3D write
OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int w, int4 color);
OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, int w, uint4 color);
OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, int w, float4 color);
+
int __gen_ocl_get_image_width(uint surface_id);
int __gen_ocl_get_image_height(uint surface_id);
int __gen_ocl_get_image_channel_data_type(uint surface_id);
int __gen_ocl_get_image_channel_order(uint surface_id);
int __gen_ocl_get_image_depth(uint surface_id);
-#define GET_IMAGE(cl_image, surface_id) \
- uint surface_id = (uint)cl_image
-
+// 2D 3D Image Common Macro
#ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND
#define GEN_FIX_1 1
#else
#define GEN_FIX_1 0
#endif
+#define GET_IMAGE(cl_image, surface_id) \
+ uint surface_id = (uint)cl_image
+
#define DECL_READ_IMAGE0(int_clamping_fix, \
image_type, type, suffix, coord_type, n) \
INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image, \
__gen_ocl_write_image ##suffix(EXPEND_WRITE_COORD(surface_id, coord, color));\
}
+
+// 1D
+#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix) \
+ DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int, 1) \
+ DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type, suffix, float, 1) \
+ DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int, 1) \
+ DECL_WRITE_IMAGE(image_type, type, suffix, int) \
+ DECL_WRITE_IMAGE(image_type, type, suffix, float)
+
+#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord
+#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int)(coord < 0 ? -1 : coord)
+#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord = srcCoord * __gen_ocl_get_image_width(id);
+#define EXPEND_WRITE_COORD(id, coord, color) id, coord, color
+
+#define OUT_OF_BOX(coord, surface, normalized) \
+ (coord < 0 || \
+ ((normalized == 0) \
+ && (coord >= __gen_ocl_get_image_width(surface))) \
+ || ((normalized != 0) && (coord > 0x1p0)))
+
+#define FIXUP_FLOAT_COORD(tmpCoord) \
+ { \
+ if (tmpCoord < 0 && tmpCoord > -0x1p-20f) \
+ tmpCoord += -0x1p-9; \
+ }
+
+DECL_IMAGE(GEN_FIX_1, image1d_t, int4, i)
+DECL_IMAGE(GEN_FIX_1, image1d_t, uint4, ui)
+DECL_IMAGE(0, image1d_t, float4, f)
+
+#undef EXPEND_READ_COORD
+#undef EXPEND_READ_COORD1
+#undef DENORMALIZE_COORD
+#undef EXPEND_WRITE_COORD
+#undef OUT_OF_BOX
+#undef FIXUP_FLOAT_COORD
+#undef DECL_IMAGE
+// End of 1D
+
+#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix, n) \
+ DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int ##n, n) \
+ DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type, suffix, float ##n, n) \
+ DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int ##n, n) \
+ DECL_WRITE_IMAGE(image_type, type, suffix, int ## n) \
+ DECL_WRITE_IMAGE(image_type, type, suffix, float ## n)
+// 2D
#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1
#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int)(coord.s0 < 0 ? -1 : coord.s0), \
(int)(coord.s1 < 0 ? -1 : coord.s1)
tmpCoord.s1 += -0x1p-9f; \
}
-#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix, n) \
- DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int ##n, n) \
- DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type, suffix, float ##n, n) \
- DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int ##n, n) \
- DECL_WRITE_IMAGE(image_type, type, suffix, int ## n) \
- DECL_WRITE_IMAGE(image_type, type, suffix, float ## n)
-
DECL_IMAGE(GEN_FIX_1, image2d_t, int4, i, 2)
DECL_IMAGE(GEN_FIX_1, image2d_t, uint4, ui, 2)
DECL_IMAGE(0, image2d_t, float4, f, 2)
#undef OUT_OF_BOX
#undef FIXUP_FLOAT_COORD
+// 3D
#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, coord.s2
#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int) (coord.s0 < 0 ? -1 : coord.s0), \
(int)(coord.s1 < 0 ? -1 : coord.s1), (int)(coord.s2 < 0 ? -1 : coord.s2)