void Context::insertCurbeReg(ir::Register reg, uint32_t offset) {
curbeRegs.insert(std::make_pair(reg, offset));
}
+ ir::Register Context::getSurfaceBaseReg(unsigned char bti) {
+ map<unsigned char, ir::Register>::iterator iter;
+ iter = btiRegMap.find(bti);
+ GBE_ASSERT(iter != btiRegMap.end());
+ return iter->second;
+ }
void Context::buildArgList(void) {
kernel->argNum = fn.argNum();
kernel->args = GBE_NEW_ARRAY_NO_ARG(KernelArgument, kernel->argNum);
else
kernel->args = NULL;
+ btiRegMap.clear();
+ btiRegMap.insert(std::make_pair(1, ir::ocl::stackbuffer));
for (uint32_t argID = 0; argID < kernel->argNum; ++argID) {
const auto &arg = fn.getArg(argID);
case ir::FunctionArgument::GLOBAL_POINTER:
kernel->args[argID].type = GBE_ARG_GLOBAL_PTR;
kernel->args[argID].size = sizeof(void*);
+ kernel->args[argID].bti = arg.bti;
+ btiRegMap.insert(std::make_pair(arg.bti, arg.reg));
break;
case ir::FunctionArgument::CONSTANT_POINTER:
kernel->args[argID].type = GBE_ARG_CONSTANT_PTR;
void deallocateScratchMem(int32_t offset);
/*! Preallocated curbe register set including special registers. */
map<ir::Register, uint32_t> curbeRegs;
+ ir::Register getSurfaceBaseReg(unsigned char bti);
protected:
/*! Build the instruction stream. Return false if failed */
virtual bool emitCode(void) = 0;
set<ir::LabelIndex> usedLabels; //!< Set of all used labels
JIPMap JIPs; //!< Where to jump all labels/branches
uint32_t simdWidth; //!< Number of lanes per HW threads
+ map<unsigned char, ir::Register> btiRegMap;
GBE_CLASS(Context); //!< Use custom allocators
};
/*! Load instruction pattern */
DECL_PATTERN(LoadInstruction)
{
+ void readDWord(Selection::Opaque &sel,
+ vector<GenRegister> &dst,
+ vector<GenRegister> &dst2,
+ GenRegister addr,
+ uint32_t valueNum,
+ ir::AddressSpace space,
+ ir::BTI bti) const
+ {
+ for (uint32_t x = 0; x < bti.count; x++) {
+ if(x > 0)
+ for (uint32_t dstID = 0; dstID < valueNum; ++dstID)
+ dst2[dstID] = sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32);
+
+ GenRegister temp = getRelativeAddress(sel, addr, space, bti.bti[x]);
+ sel.UNTYPED_READ(temp, dst2.data(), valueNum, bti.bti[x]);
+ if(x > 0) {
+ sel.push();
+ if(sel.isScalarReg(dst[0].reg())) {
+ sel.curr.noMask = 1;
+ sel.curr.execWidth = 1;
+ }
+ for (uint32_t y = 0; y < valueNum; y++)
+ sel.ADD(dst[y], dst[y], dst2[y]);
+ sel.pop();
+ }
+ }
+ }
+
void emitUntypedRead(Selection::Opaque &sel,
const ir::LoadInstruction &insn,
GenRegister addr,
- uint32_t bti) const
+ ir::BTI bti) const
{
using namespace ir;
const uint32_t valueNum = insn.getValueNum();
vector<GenRegister> dst(valueNum);
+ vector<GenRegister> dst2(valueNum);
for (uint32_t dstID = 0; dstID < valueNum; ++dstID)
- dst[dstID] = GenRegister::retype(sel.selReg(insn.getValue(dstID)), GEN_TYPE_F);
- sel.UNTYPED_READ(addr, dst.data(), valueNum, bti);
+ dst2[dstID] = dst[dstID] = sel.selReg(insn.getValue(dstID), TYPE_U32);
+ readDWord(sel, dst, dst2, addr, valueNum, insn.getAddressSpace(), bti);
}
void emitDWordGather(Selection::Opaque &sel,
const ir::LoadInstruction &insn,
GenRegister addr,
- uint32_t bti) const
+ ir::BTI bti) const
{
using namespace ir;
+ GBE_ASSERT(bti.count == 1);
const uint32_t simdWidth = sel.isScalarReg(insn.getValue(0)) ? 1 : sel.ctx.getSimdWidth();
GBE_ASSERT(insn.getValueNum() == 1);
GenRegister dst = sel.selReg(insn.getValue(0), ir::TYPE_U32);
sel.push();
sel.curr.noMask = 1;
- sel.SAMPLE(&dst, 1, &addr, 1, bti, 0, true, true);
+ sel.SAMPLE(&dst, 1, &addr, 1, bti.bti[0], 0, true, true);
sel.pop();
return;
}
sel.SHR(addrDW, GenRegister::retype(addr, GEN_TYPE_UD), GenRegister::immud(2));
sel.pop();
- sel.DWORD_GATHER(dst, addrDW, bti);
+ sel.DWORD_GATHER(dst, addrDW, bti.bti[0]);
}
void emitRead64(Selection::Opaque &sel,
const ir::LoadInstruction &insn,
GenRegister addr,
- uint32_t bti) const
+ ir::BTI bti) const
{
using namespace ir;
const uint32_t valueNum = insn.getValueNum();
/* XXX support scalar only right now. */
GBE_ASSERT(valueNum == 1);
-
+ GBE_ASSERT(bti.count == 1);
GenRegister dst[valueNum];
+ GenRegister tmpAddr = getRelativeAddress(sel, addr, insn.getAddressSpace(), bti.bti[0]);
for ( uint32_t dstID = 0; dstID < valueNum; ++dstID)
dst[dstID] = sel.selReg(insn.getValue(dstID), ir::TYPE_U64);
- sel.READ64(addr, dst, valueNum, bti);
+ sel.READ64(tmpAddr, dst, valueNum, bti.bti[0]);
}
- void emitByteGather(Selection::Opaque &sel,
- const ir::LoadInstruction &insn,
+ void readByteAsDWord(Selection::Opaque &sel,
const uint32_t elemSize,
GenRegister address,
- uint32_t bti) const
+ GenRegister dst,
+ uint32_t simdWidth,
+ uint8_t bti) const
{
using namespace ir;
- const uint32_t valueNum = insn.getValueNum();
- const uint32_t simdWidth = sel.isScalarReg(insn.getValue(0)) ?
- 1 : sel.ctx.getSimdWidth();
- if(valueNum > 1) {
- vector<GenRegister> dst(valueNum);
- const uint32_t typeSize = getFamilySize(getFamily(insn.getValueType()));
-
- if(elemSize == GEN_BYTE_SCATTER_WORD) {
- for(uint32_t i = 0; i < valueNum; i++)
- dst[i] = sel.selReg(insn.getValue(i), ir::TYPE_U16);
- } else if(elemSize == GEN_BYTE_SCATTER_BYTE) {
- for(uint32_t i = 0; i < valueNum; i++)
- dst[i] = sel.selReg(insn.getValue(i), ir::TYPE_U8);
- }
-
- uint32_t tmpRegNum = typeSize*valueNum / 4;
- vector<GenRegister> tmp(tmpRegNum);
- for(uint32_t i = 0; i < tmpRegNum; i++) {
- tmp[i] = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
- }
-
- sel.UNTYPED_READ(address, tmp.data(), tmpRegNum, bti);
-
- for(uint32_t i = 0; i < tmpRegNum; i++) {
- sel.UNPACK_BYTE(dst.data() + i * 4/typeSize, tmp[i], 4/typeSize);
- }
- } else {
- GBE_ASSERT(insn.getValueNum() == 1);
- const GenRegister value = sel.selReg(insn.getValue(0));
- GBE_ASSERT(elemSize == GEN_BYTE_SCATTER_WORD || elemSize == GEN_BYTE_SCATTER_BYTE);
-
Register tmpReg = sel.reg(FAMILY_DWORD, simdWidth == 1);
GenRegister tmpAddr = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
GenRegister tmpData = GenRegister::udxgrf(simdWidth, tmpReg);
sel.SHR(tmpData, tmpData, tmpAddr);
if (elemSize == GEN_BYTE_SCATTER_WORD)
- sel.MOV(GenRegister::retype(value, GEN_TYPE_UW), sel.unpacked_uw(tmpReg));
+ sel.MOV(GenRegister::retype(dst, GEN_TYPE_UW), sel.unpacked_uw(tmpReg));
else if (elemSize == GEN_BYTE_SCATTER_BYTE)
- sel.MOV(GenRegister::retype(value, GEN_TYPE_UB), sel.unpacked_ub(tmpReg));
+ sel.MOV(GenRegister::retype(dst, GEN_TYPE_UB), sel.unpacked_ub(tmpReg));
sel.pop();
+ }
+
+ void emitByteGather(Selection::Opaque &sel,
+ const ir::LoadInstruction &insn,
+ const uint32_t elemSize,
+ GenRegister address,
+ ir::BTI bti) const
+ {
+ using namespace ir;
+ const uint32_t valueNum = insn.getValueNum();
+ const uint32_t simdWidth = sel.isScalarReg(insn.getValue(0)) ?
+ 1 : sel.ctx.getSimdWidth();
+ RegisterFamily family = getFamily(insn.getValueType());
+
+ if(valueNum > 1) {
+ vector<GenRegister> dst(valueNum);
+ const uint32_t typeSize = getFamilySize(family);
+
+ for(uint32_t i = 0; i < valueNum; i++)
+ dst[i] = sel.selReg(insn.getValue(i), getType(family));
+
+ uint32_t tmpRegNum = typeSize*valueNum / 4;
+ vector<GenRegister> tmp(tmpRegNum);
+ vector<GenRegister> tmp2(tmpRegNum);
+ for(uint32_t i = 0; i < tmpRegNum; i++) {
+ tmp2[i] = tmp[i] = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
+ }
+
+ readDWord(sel, tmp, tmp2, address, tmpRegNum, insn.getAddressSpace(), bti);
+
+ for(uint32_t i = 0; i < tmpRegNum; i++) {
+ sel.UNPACK_BYTE(dst.data() + i * 4/typeSize, tmp[i], 4/typeSize);
+ }
+ } else {
+ GBE_ASSERT(insn.getValueNum() == 1);
+ const GenRegister value = sel.selReg(insn.getValue(0), insn.getValueType());
+ GBE_ASSERT(elemSize == GEN_BYTE_SCATTER_WORD || elemSize == GEN_BYTE_SCATTER_BYTE);
+ GenRegister tmp = value;
+
+ for (int x = 0; x < bti.count; x++) {
+ if (x > 0)
+ tmp = sel.selReg(sel.reg(family, simdWidth == 1), insn.getValueType());
+
+ GenRegister addr = getRelativeAddress(sel, address, insn.getAddressSpace(), bti.bti[x]);
+ readByteAsDWord(sel, elemSize, addr, tmp, simdWidth, bti.bti[x]);
+ if (x > 0) {
+ sel.push();
+ if (simdWidth == 1) {
+ sel.curr.noMask = 1;
+ sel.curr.execWidth = 1;
+ }
+ sel.ADD(value, value, tmp);
+ sel.pop();
+ }
+ }
}
}
sel.INDIRECT_MOVE(dst, src);
}
+ INLINE GenRegister getRelativeAddress(Selection::Opaque &sel, GenRegister address, ir::AddressSpace space, uint8_t bti) const {
+ if(space == ir::MEM_LOCAL || space == ir::MEM_CONSTANT)
+ return address;
+
+ sel.push();
+ sel.curr.noMask = 1;
+ GenRegister temp = sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32);
+ sel.ADD(temp, address, GenRegister::negate(sel.selReg(sel.ctx.getSurfaceBaseReg(bti), ir::TYPE_U32)));
+ sel.pop();
+ return temp;
+ }
+
INLINE bool emitOne(Selection::Opaque &sel, const ir::LoadInstruction &insn, bool &markChildren) const {
using namespace ir;
GenRegister address = sel.selReg(insn.getAddress(), ir::TYPE_U32);
sel.ADD(temp, address, sel.selReg(ocl::slmoffset, ir::TYPE_U32));
address = temp;
}
- if (insn.getAddressSpace() == MEM_CONSTANT) {
+ BTI bti;
+ if (space == MEM_CONSTANT || space == MEM_LOCAL) {
+ bti.bti[0] = space == MEM_CONSTANT ? BTI_CONSTANT : 0xfe;
+ bti.count = 1;
+ } else {
+ bti = insn.getBTI();
+ }
+ if (space == MEM_CONSTANT) {
// XXX TODO read 64bit constant through constant cache
// Per HW Spec, constant cache messages can read at least DWORD data.
// So, byte/short data type, we have to read through data cache.
if(insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)
- this->emitRead64(sel, insn, address, 0x2);
+ this->emitRead64(sel, insn, address, bti);
else if(insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
- this->emitDWordGather(sel, insn, address, 0x2);
+ this->emitDWordGather(sel, insn, address, bti);
else {
- this->emitByteGather(sel, insn, elemSize, address, 0x2);
+ this->emitByteGather(sel, insn, elemSize, address, bti);
+ }
+ } else {
+ if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)
+ this->emitRead64(sel, insn, address, bti);
+ else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
+ this->emitUntypedRead(sel, insn, address, bti);
+ else {
+ this->emitByteGather(sel, insn, elemSize, address, bti);
}
- }
- else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)
- this->emitRead64(sel, insn, address, space == MEM_LOCAL ? 0xfe : 0x00);
- else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
- this->emitUntypedRead(sel, insn, address, space == MEM_LOCAL ? 0xfe : 0x00);
- else {
- this->emitByteGather(sel, insn, elemSize, address, space == MEM_LOCAL ? 0xfe : 0x01);
}
return true;
}
{
using namespace ir;
const AddressSpace space = insn.getAddressSpace();
- const uint32_t bti = space == MEM_LOCAL ? 0xfe : 0x01;
const Type type = insn.getValueType();
const uint32_t elemSize = getByteScatterGatherSize(type);
GenRegister address = sel.selReg(insn.getAddress(), ir::TYPE_U32);
sel.ADD(temp, address, sel.selReg(ocl::slmoffset, ir::TYPE_U32));
address = temp;
}
- if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)
- this->emitWrite64(sel, insn, address, bti);
- else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
- this->emitUntypedWrite(sel, insn, address, bti);
- else {
- this->emitByteScatter(sel, insn, elemSize, address, bti);
+ if(space == MEM_LOCAL) {
+ if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)
+ this->emitWrite64(sel, insn, address, 0xfe);
+ else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
+ this->emitUntypedWrite(sel, insn, address, 0xfe);
+ else
+ this->emitByteScatter(sel, insn, elemSize, address, 0xfe);
+ } else {
+ BTI bti = insn.getBTI();
+ for (int x = 0; x < bti.count; x++) {
+ GenRegister temp = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
+ sel.push();
+ sel.curr.noMask = 1;
+ sel.ADD(temp, address, GenRegister::negate(sel.selReg(sel.ctx.getSurfaceBaseReg(bti.bti[x]), ir::TYPE_U32)));
+ sel.pop();
+ if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_QWORD)
+ this->emitWrite64(sel, insn, temp, bti.bti[x]);
+ else if (insn.isAligned() == true && elemSize == GEN_BYTE_SCATTER_DWORD)
+ this->emitUntypedWrite(sel, insn, temp, bti.bti[x]);
+ else {
+ this->emitByteScatter(sel, insn, elemSize, temp, bti.bti[x]);
+ }
+ }
}
return true;
}
using namespace ir;
const AtomicOps atomicOp = insn.getAtomicOpcode();
const AddressSpace space = insn.getAddressSpace();
- const uint32_t bti = space == MEM_LOCAL ? 0xfe : 0x01;
const uint32_t srcNum = insn.getSrcNum();
+
GenRegister src0 = sel.selReg(insn.getSrc(0), TYPE_U32); //address
GenRegister src1 = src0, src2 = src0;
if(srcNum > 1) src1 = sel.selReg(insn.getSrc(1), TYPE_U32);
if(srcNum > 2) src2 = sel.selReg(insn.getSrc(2), TYPE_U32);
GenRegister dst = sel.selReg(insn.getDst(0), TYPE_U32);
GenAtomicOpCode genAtomicOp = (GenAtomicOpCode)atomicOp;
- if(space == MEM_LOCAL && sel.needPatchSLMAddr()){
- GenRegister temp = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
- sel.ADD(temp, src0, sel.selReg(ocl::slmoffset, ir::TYPE_U32));
- src0 = temp;
+ if(space == MEM_LOCAL) {
+ if (sel.needPatchSLMAddr()) {
+ GenRegister temp = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
+ sel.ADD(temp, src0, sel.selReg(ocl::slmoffset, ir::TYPE_U32));
+ src0 = temp;
+ }
+ sel.ATOMIC(dst, genAtomicOp, srcNum, src0, src1, src2, 0xfe);
+ } else {
+ ir::BTI b = insn.getBTI();
+ for (int x = 0; x < b.count; x++) {
+ sel.push();
+ sel.curr.noMask = 1;
+ GenRegister temp = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
+ sel.ADD(temp, src0, GenRegister::negate(sel.selReg(sel.ctx.getSurfaceBaseReg(b.bti[x]), ir::TYPE_U32)));
+ sel.pop();
+ sel.ATOMIC(dst, genAtomicOp, srcNum, temp, src1, src2, b.bti[x]);
+ }
}
- sel.ATOMIC(dst, genAtomicOp, srcNum, src0, src1, src2, bti);
return true;
}
DECL_CTOR(AtomicInstruction, 1, 1);
OUT_UPDATE_SZ(arg.type);
OUT_UPDATE_SZ(arg.size);
OUT_UPDATE_SZ(arg.align);
- OUT_UPDATE_SZ(arg.bufSize);
+ OUT_UPDATE_SZ(arg.bti);
}
OUT_UPDATE_SZ(patches.size());
IN_UPDATE_SZ(arg.type);
IN_UPDATE_SZ(arg.size);
IN_UPDATE_SZ(arg.align);
- IN_UPDATE_SZ(arg.bufSize);
+ IN_UPDATE_SZ(arg.bti);
}
IN_UPDATE_SZ(patch_num);
outs << spaces_nl << " type value: "<< arg.type << "\n";
outs << spaces_nl << " size: "<< arg.size << "\n";
outs << spaces_nl << " align: "<< arg.align << "\n";
- outs << spaces_nl << " bufSize: "<< arg.bufSize << "\n";
+ outs << spaces_nl << " bti: "<< arg.bti << "\n";
}
outs << spaces_nl << " Patches Number is " << patches.size() << "\n";
return kernel->getArgSize(argID);
}
+ static uint8_t kernelGetArgBTI(gbe_kernel genKernel, uint32_t argID) {
+ if (genKernel == NULL) return 0u;
+ const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel;
+ return kernel->getArgBTI(argID);
+ }
+
static uint32_t kernelGetArgAlign(gbe_kernel genKernel, uint32_t argID) {
if (genKernel == NULL) return 0u;
const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel;
GBE_EXPORT_SYMBOL gbe_kernel_get_arg_num_cb *gbe_kernel_get_arg_num = NULL;
GBE_EXPORT_SYMBOL gbe_kernel_get_arg_info_cb *gbe_kernel_get_arg_info = NULL;
GBE_EXPORT_SYMBOL gbe_kernel_get_arg_size_cb *gbe_kernel_get_arg_size = NULL;
+GBE_EXPORT_SYMBOL gbe_kernel_get_arg_bti_cb *gbe_kernel_get_arg_bti = NULL;
GBE_EXPORT_SYMBOL gbe_kernel_get_arg_type_cb *gbe_kernel_get_arg_type = NULL;
GBE_EXPORT_SYMBOL gbe_kernel_get_arg_align_cb *gbe_kernel_get_arg_align = NULL;
GBE_EXPORT_SYMBOL gbe_kernel_get_simd_width_cb *gbe_kernel_get_simd_width = NULL;
gbe_kernel_get_arg_num = gbe::kernelGetArgNum;
gbe_kernel_get_arg_info = gbe::kernelGetArgInfo;
gbe_kernel_get_arg_size = gbe::kernelGetArgSize;
+ gbe_kernel_get_arg_bti = gbe::kernelGetArgBTI;
gbe_kernel_get_arg_type = gbe::kernelGetArgType;
gbe_kernel_get_arg_align = gbe::kernelGetArgAlign;
gbe_kernel_get_simd_width = gbe::kernelGetSIMDWidth;
GBE_GET_ARG_INFO_INVALID = 0xffffffff
};
+// BTI magic number
+#define BTI_CONSTANT 0
+#define BTI_PRIVATE 1
+#define BTI_RESERVED_NUM 2
+
/*! Constant buffer values (ie values to setup in the constant buffer) */
enum gbe_curbe_type {
GBE_CURBE_LOCAL_ID_X = 0,
typedef uint32_t (gbe_kernel_get_arg_size_cb)(gbe_kernel, uint32_t argID);
extern gbe_kernel_get_arg_size_cb *gbe_kernel_get_arg_size;
+/*! Get the the bti of a __global buffer */
+typedef uint8_t (gbe_kernel_get_arg_bti_cb)(gbe_kernel, uint32_t argID);
+extern gbe_kernel_get_arg_bti_cb *gbe_kernel_get_arg_bti;
+
/*! Get the type of the given argument */
typedef enum gbe_arg_type (gbe_kernel_get_arg_type_cb)(gbe_kernel, uint32_t argID);
extern gbe_kernel_get_arg_type_cb *gbe_kernel_get_arg_type;
gbe_arg_type type; //!< Pointer, structure, image, regular value?
uint32_t size; //!< Size of the argument
uint32_t align; //!< addr alignment of the argument
- uint32_t bufSize; //!< Contant buffer size
+ uint8_t bti; //!< binding table index for __global buffer
ir::FunctionArgument::InfoFromLLVM info;
};
INLINE uint32_t getArgSize(uint32_t argID) const {
return argID >= argNum ? 0u : args[argID].size;
}
+ /*! Return the bti for __global buffer */
+ INLINE uint8_t getArgBTI(uint32_t argID) const {
+ return argID >= argNum ? 0u : args[argID].bti;
+ }
+ /*! Return the alignment of buffer argument */
INLINE uint32_t getArgAlign(uint32_t argID) const {
return argID >= argNum ? 0u : args[argID].align;
}
gbe_kernel_get_name = gbe::kernelGetName;
gbe_kernel_get_arg_type = gbe::kernelGetArgType;
gbe_kernel_get_arg_size = gbe::kernelGetArgSize;
+ gbe_kernel_get_arg_bti = gbe::kernelGetArgBTI;
gbe_kernel_get_simd_width = gbe::kernelGetSIMDWidth;
gbe_kernel_get_scratch_size = gbe::kernelGetScratchSize;
gbe_kernel_use_slm = gbe::kernelUseSLM;
}
void Context::input(const std::string &name, FunctionArgument::Type type, Register reg,
- FunctionArgument::InfoFromLLVM& info, uint32_t elementSize, uint32_t align) {
+ FunctionArgument::InfoFromLLVM& info, uint32_t elementSize, uint32_t align, unsigned char bti) {
GBE_ASSERTM(fn != NULL, "No function currently defined");
GBE_ASSERTM(reg < fn->file.regNum(), "Out-of-bound register");
- FunctionArgument *arg = GBE_NEW(FunctionArgument, type, reg, elementSize, name, align, info);
+ FunctionArgument *arg = GBE_NEW(FunctionArgument, type, reg, elementSize, name, align, info, bti);
fn->args.push_back(arg);
}
LabelIndex label(void);
/*! Append a new input register for the function */
void input(const std::string &name, FunctionArgument::Type type, Register reg,
- FunctionArgument::InfoFromLLVM& info, uint32_t elemSz = 0u, uint32_t align = 0);
+ FunctionArgument::InfoFromLLVM& info, uint32_t elemSz = 0u, uint32_t align = 0, uint8_t bti = 0);
/*! Append a new output register for the function */
void output(Register reg);
/*! Get the immediate value */
/*! LOAD with the destinations directly specified */
template <typename... Args>
- void LOAD(Type type, Register offset, AddressSpace space, bool dwAligned, Args...values)
+ void LOAD(Type type, Register offset, AddressSpace space, bool dwAligned, BTI bti, Args...values)
{
const Tuple index = this->tuple(values...);
const uint16_t valueNum = std::tuple_size<std::tuple<Args...>>::value;
GBE_ASSERT(valueNum > 0);
- this->LOAD(type, index, offset, space, valueNum, dwAligned);
+ this->LOAD(type, index, offset, space, valueNum, dwAligned, bti);
}
/*! STORE with the sources directly specified */
template <typename... Args>
- void STORE(Type type, Register offset, AddressSpace space, bool dwAligned, Args...values)
+ void STORE(Type type, Register offset, AddressSpace space, bool dwAligned, BTI bti, Args...values)
{
const Tuple index = this->tuple(values...);
const uint16_t valueNum = std::tuple_size<std::tuple<Args...>>::value;
GBE_ASSERT(valueNum > 0);
- this->STORE(type, index, offset, space, valueNum, dwAligned);
+ this->STORE(type, index, offset, space, valueNum, dwAligned, bti);
}
protected:
};
/*! Create a function input argument */
- INLINE FunctionArgument(Type type, Register reg, uint32_t size, const std::string &name, uint32_t align, InfoFromLLVM& info) :
- type(type), reg(reg), size(size), align(align), name(name), info(info) { }
+ INLINE FunctionArgument(Type type, Register reg, uint32_t size, const std::string &name, uint32_t align, InfoFromLLVM& info, uint8_t bti) :
+ type(type), reg(reg), size(size), align(align), name(name), info(info), bti(bti) { }
Type type; //!< Gives the type of argument we have
Register reg; //!< Holds the argument
uint32_t align; //!< address alignment for the argument
const std::string name; //!< Holds the function name for IR output
InfoFromLLVM info; //!< Holds the llvm passed info
+ uint8_t bti; //!< binding table index
GBE_STRUCT(FunctionArgument); // Use custom allocator
};
}
void ImageSet::getData(struct ImageInfo *imageInfos) const {
+ int id = 0;
for(auto &it : regMap)
- imageInfos[it.second->idx - gbe_get_image_base_index()] = *it.second;
+ imageInfos[id++] = *it.second;
}
ImageSet::~ImageSet() {
IN_UPDATE_SZ(img_info->channelOrderSlot);
IN_UPDATE_SZ(img_info->dimOrderSlot);
- indexMap.insert(std::make_pair(index, img_info));
+ indexMap.insert(std::make_pair(img_info->idx, img_info));
}
IN_UPDATE_SZ(magic);
return reg;
}
- void ImageSet::append(Register imageReg, Context *ctx)
+ void ImageSet::append(Register imageReg, Context *ctx, uint8_t bti)
{
ir::FunctionArgument *arg = ctx->getFunction().getArg(imageReg);
GBE_ASSERTM(arg && arg->type == ir::FunctionArgument::IMAGE, "Append an invalid reg to image set.");
int32_t id = ctx->getFunction().getArgID(arg);
struct ImageInfo *imageInfo = GBE_NEW(struct ImageInfo);
imageInfo->arg_idx = id;
- imageInfo->idx = regMap.size() + gbe_get_image_base_index();
+ imageInfo->idx = bti;
imageInfo->wSlot = -1;
imageInfo->hSlot = -1;
imageInfo->depthSlot = -1;
{
public:
/*! Append an image argument. */
- void append(Register imageReg, Context *ctx);
+ void append(Register imageReg, Context *ctx, uint8_t bti);
/*! Append an image info slot. */
void appendInfo(ImageInfoKey key, uint32_t offset);
/*! Append an image info register. */
AtomicInstruction(AtomicOps atomicOp,
Register dst,
AddressSpace addrSpace,
+ BTI bti,
Tuple src)
{
this->opcode = OP_ATOMIC;
this->dst[0] = dst;
this->src = src;
this->addrSpace = addrSpace;
+ this->bti = bti;
srcNum = 2;
if((atomicOp == ATOMIC_OP_INC) ||
(atomicOp == ATOMIC_OP_DEC))
srcNum = 3;
}
INLINE AddressSpace getAddressSpace(void) const { return this->addrSpace; }
+ INLINE BTI getBTI(void) const { return bti; }
INLINE AtomicOps getAtomicOpcode(void) const { return this->atomicOp; }
INLINE bool wellFormed(const Function &fn, std::string &whyNot) const;
INLINE void out(std::ostream &out, const Function &fn) const;
Register dst[1];
Tuple src;
AddressSpace addrSpace; //!< Address space
+ BTI bti; //!< bti
uint8_t srcNum:2; //!<Source Number
AtomicOps atomicOp:6; //!<Source Number
};
Register offset,
AddressSpace addrSpace,
uint32_t valueNum,
- bool dwAligned)
+ bool dwAligned,
+ BTI bti)
{
GBE_ASSERT(valueNum < 128);
this->opcode = OP_LOAD;
this->addrSpace = addrSpace;
this->valueNum = valueNum;
this->dwAligned = dwAligned ? 1 : 0;
+ this->bti = bti;
}
INLINE Register getDst(const Function &fn, uint32_t ID) const {
GBE_ASSERTM(ID < valueNum, "Out-of-bound source register");
INLINE Type getValueType(void) const { return type; }
INLINE uint32_t getValueNum(void) const { return valueNum; }
INLINE AddressSpace getAddressSpace(void) const { return addrSpace; }
+ INLINE BTI getBTI(void) const { return bti; }
INLINE bool wellFormed(const Function &fn, std::string &why) const;
INLINE void out(std::ostream &out, const Function &fn) const;
INLINE bool isAligned(void) const { return !!dwAligned; }
Register offset; //!< Alias to make it similar to store
Tuple values; //!< Values to load
AddressSpace addrSpace; //!< Where to load
+ BTI bti;
uint8_t valueNum:7; //!< Number of values to load
uint8_t dwAligned:1; //!< DWORD aligned is what matters with GEN
};
Register offset,
AddressSpace addrSpace,
uint32_t valueNum,
- bool dwAligned)
+ bool dwAligned,
+ BTI bti)
{
GBE_ASSERT(valueNum < 255);
this->opcode = OP_STORE;
this->addrSpace = addrSpace;
this->valueNum = valueNum;
this->dwAligned = dwAligned ? 1 : 0;
+ this->bti = bti;
}
INLINE Register getSrc(const Function &fn, uint32_t ID) const {
GBE_ASSERTM(ID < valueNum + 1u, "Out-of-bound source register for store");
INLINE uint32_t getValueNum(void) const { return valueNum; }
INLINE Type getValueType(void) const { return type; }
INLINE AddressSpace getAddressSpace(void) const { return addrSpace; }
+ INLINE BTI getBTI(void) const { return bti; }
INLINE bool wellFormed(const Function &fn, std::string &why) const;
INLINE void out(std::ostream &out, const Function &fn) const;
INLINE bool isAligned(void) const { return !!dwAligned; }
Register offset; //!< First source is the offset where to store
Tuple values; //!< Values to store
AddressSpace addrSpace; //!< Where to store
+ BTI bti; //!< Which btis need access
uint8_t valueNum:7; //!< Number of values to store
uint8_t dwAligned:1; //!< DWORD aligned is what matters with GEN
Register dst[0]; //!< No destination
out << " {" << "%" << this->getSrc(fn, 0) << "}";
for (uint32_t i = 1; i < srcNum; ++i)
out << " %" << this->getSrc(fn, i);
+ out << " bti";
+ for (uint32_t i = 0; i < bti.count; ++i)
+ out << ": " << (int)bti.bti[i];
}
out << "%" << this->getDst(fn, i) << (i != (valueNum-1u) ? " " : "");
out << "}";
out << " %" << this->getSrc(fn, 0);
+ out << " bti";
+ for (uint32_t i = 0; i < bti.count; ++i)
+ out << ": " << (int)bti.bti[i];
}
INLINE void StoreInstruction::out(std::ostream &out, const Function &fn) const {
for (uint32_t i = 0; i < valueNum; ++i)
out << "%" << this->getSrc(fn, i+1) << (i != (valueNum-1u) ? " " : "");
out << "}";
+ out << " bti";
+ for (uint32_t i = 0; i < bti.count; ++i)
+ out << ": " << (int)bti.bti[i];
}
INLINE void LabelInstruction::out(std::ostream &out, const Function &fn) const {
return HelperIntrospection<CLASS, RefClass>::value == 1;
#define START_INTROSPECTION(CLASS) \
- static_assert(sizeof(internal::CLASS) == sizeof(uint64_t), \
+ static_assert(sizeof(internal::CLASS) == (sizeof(uint64_t)*2), \
"Bad instruction size"); \
static_assert(offsetof(internal::CLASS, opcode) == 0, \
"Bad opcode offset"); \
DECL_MEM_FN(ConvertInstruction, Type, getSrcType(void), getSrcType())
DECL_MEM_FN(ConvertInstruction, Type, getDstType(void), getDstType())
DECL_MEM_FN(AtomicInstruction, AddressSpace, getAddressSpace(void), getAddressSpace())
+DECL_MEM_FN(AtomicInstruction, BTI, getBTI(void), getBTI())
DECL_MEM_FN(AtomicInstruction, AtomicOps, getAtomicOpcode(void), getAtomicOpcode())
DECL_MEM_FN(StoreInstruction, Type, getValueType(void), getValueType())
DECL_MEM_FN(StoreInstruction, uint32_t, getValueNum(void), getValueNum())
DECL_MEM_FN(StoreInstruction, AddressSpace, getAddressSpace(void), getAddressSpace())
+DECL_MEM_FN(StoreInstruction, BTI, getBTI(void), getBTI())
DECL_MEM_FN(StoreInstruction, bool, isAligned(void), isAligned())
DECL_MEM_FN(LoadInstruction, Type, getValueType(void), getValueType())
DECL_MEM_FN(LoadInstruction, uint32_t, getValueNum(void), getValueNum())
DECL_MEM_FN(LoadInstruction, AddressSpace, getAddressSpace(void), getAddressSpace())
+DECL_MEM_FN(LoadInstruction, BTI, getBTI(void), getBTI())
DECL_MEM_FN(LoadInstruction, bool, isAligned(void), isAligned())
DECL_MEM_FN(LoadImmInstruction, Type, getType(void), getType())
DECL_MEM_FN(LabelInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())
}
// For all unary functions with given opcode
- Instruction ATOMIC(AtomicOps atomicOp, Register dst, AddressSpace space, Tuple src) {
- return internal::AtomicInstruction(atomicOp, dst, space, src).convert();
+ Instruction ATOMIC(AtomicOps atomicOp, Register dst, AddressSpace space, BTI bti, Tuple src) {
+ return internal::AtomicInstruction(atomicOp, dst, space, bti, src).convert();
}
// BRA
Register offset, \
AddressSpace space, \
uint32_t valueNum, \
- bool dwAligned) \
+ bool dwAligned, \
+ BTI bti) \
{ \
- return internal::CLASS(type,tuple,offset,space,valueNum,dwAligned).convert(); \
+ return internal::CLASS(type,tuple,offset,space,valueNum,dwAligned,bti).convert(); \
}
DECL_EMIT_FUNCTION(LOAD, LoadInstruction)
#include "sys/intrusive_list.hpp"
#include <ostream>
+#define MAX_MIXED_POINTER 4
namespace gbe {
namespace ir {
+ struct BTI {
+ uint8_t bti[MAX_MIXED_POINTER];
+ uint8_t count;
+ };
/*! All opcodes */
enum Opcode : uint8_t {
///////////////////////////////////////////////////////////////////////////
/*! Stores instruction internal data and opcode */
- class ALIGNED(sizeof(uint64_t)) InstructionBase
+ class ALIGNED(sizeof(uint64_t)*2) InstructionBase
{
public:
/*! Initialize the instruction from a 8 bytes stream */
/*! Get the instruction opcode */
INLINE Opcode getOpcode(void) const { return opcode; }
protected:
- enum { opaqueSize = sizeof(uint64_t)-sizeof(uint8_t) };
+ enum { opaqueSize = sizeof(uint64_t)*2-sizeof(uint8_t) };
Opcode opcode; //!< Idendifies the instruction
char opaque[opaqueSize]; //!< Remainder of it
GBE_CLASS(InstructionBase); //!< Use internal allocators
static const uint32_t addressIndex = 0;
/*! Address space that is manipulated here */
AddressSpace getAddressSpace(void) const;
+ BTI getBTI(void) const;
/*! Return the atomic function code */
AtomicOps getAtomicOpcode(void) const;
/*! Return the register that contains the addresses */
Type getValueType(void) const;
/*! Give the number of values the instruction is storing (srcNum-1) */
uint32_t getValueNum(void) const;
+ BTI getBTI(void) const;
/*! Address space that is manipulated here */
AddressSpace getAddressSpace(void) const;
/*! DWORD aligned means untyped read for Gen. That is what matters */
bool isAligned(void) const;
/*! Return the register that contains the addresses */
INLINE Register getAddress(void) const { return this->getSrc(0u); }
+ BTI getBTI(void) const;
/*! Return the register that contain value valueID */
INLINE Register getValue(uint32_t valueID) const {
return this->getDst(valueID);
/*! F32TO16.{dstType <- srcType} dst src */
Instruction F32TO16(Type dstType, Type srcType, Register dst, Register src);
/*! atomic dst addr.space {src1 {src2}} */
- Instruction ATOMIC(AtomicOps opcode, Register dst, AddressSpace space, Tuple src);
+ Instruction ATOMIC(AtomicOps opcode, Register dst, AddressSpace space, BTI bti, Tuple src);
/*! bra labelIndex */
Instruction BRA(LabelIndex labelIndex);
/*! (pred) bra labelIndex */
/*! ret */
Instruction RET(void);
/*! load.type.space {dst1,...,dst_valueNum} offset value */
- Instruction LOAD(Type type, Tuple dst, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned);
+ Instruction LOAD(Type type, Tuple dst, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned, BTI bti);
/*! store.type.space offset {src1,...,src_valueNum} value */
- Instruction STORE(Type type, Tuple src, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned);
+ Instruction STORE(Type type, Tuple src, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned, BTI bti);
/*! loadi.type dst value */
Instruction LOADI(Type type, Register dst, ImmediateIndex value);
/*! sync.params... (see Sync instruction) */
#include "ir/value.hpp"
#include "sys/set.hpp"
#include "sys/cvar.hpp"
+#include "backend/program.h"
/* Not defined for LLVM 3.0 */
#if !defined(LLVM_VERSION_MAJOR)
* compare instructions we need to invert to decrease branch complexity
*/
set<const Value*> conditionSet;
+ map<const Value*, int> globalPointer;
/*!
* <phi,phiCopy> node information for later optimization
*/
LoopInfo *LI;
const Module *TheModule;
-
+ int btiBase;
public:
static char ID;
explicit GenWriter(ir::Unit &unit)
ctx(unit),
regTranslator(ctx),
LI(0),
- TheModule(0)
+ TheModule(0),
+ btiBase(BTI_RESERVED_NUM)
{
initializeLoopInfoPass(*PassRegistry::getPassRegistry());
pass = PASS_EMIT_REGISTERS;
LI = &getAnalysis<LoopInfo>();
emitFunction(F);
phiMap.clear();
+ globalPointer.clear();
+ // Reset for next function
+ btiBase = BTI_RESERVED_NUM;
return false;
}
void visitInsertValueInst(InsertValueInst &I) {NOT_SUPPORTED;}
void visitExtractValueInst(ExtractValueInst &I) {NOT_SUPPORTED;}
template <bool isLoad, typename T> void visitLoadOrStore(T &I);
+
+ INLINE void gatherBTI(Value *pointer, ir::BTI &bti);
// batch vec4/8/16 load/store
INLINE void emitBatchLoadOrStore(const ir::Type type, const uint32_t elemNum,
Value *llvmValue, const ir::Register ptr,
- const ir::AddressSpace addrSpace, Type * elemType, bool isLoad);
+ const ir::AddressSpace addrSpace, Type * elemType, bool isLoad, ir::BTI bti);
void visitInstruction(Instruction &I) {NOT_SUPPORTED;}
};
const uint32_t elemSize = getTypeByteSize(unit, elemType);
const uint32_t elemNum = vectorType->getNumElements();
//vector's elemType always scalar type
- ctx.input(argName, ir::FunctionArgument::VALUE, reg, llvmInfo, elemNum*elemSize, getAlignmentByte(unit, type));
+ ctx.input(argName, ir::FunctionArgument::VALUE, reg, llvmInfo, elemNum*elemSize, getAlignmentByte(unit, type), 0);
ir::Function& fn = ctx.getFunction();
for(uint32_t i=1; i < elemNum; i++) {
"vector type in the function argument is not supported yet");
const ir::Register reg = getRegister(I);
if (type->isPointerTy() == false)
- ctx.input(argName, ir::FunctionArgument::VALUE, reg, llvmInfo, getTypeByteSize(unit, type), getAlignmentByte(unit, type));
+ ctx.input(argName, ir::FunctionArgument::VALUE, reg, llvmInfo, getTypeByteSize(unit, type), getAlignmentByte(unit, type), 0);
else {
PointerType *pointerType = dyn_cast<PointerType>(type);
Type *pointed = pointerType->getElementType();
if (I->hasByValAttr()) {
#endif /* LLVM_VERSION_MINOR <= 1 */
const size_t structSize = getTypeByteSize(unit, pointed);
- ctx.input(argName, ir::FunctionArgument::STRUCTURE, reg, llvmInfo, structSize, getAlignmentByte(unit, type));
+ ctx.input(argName, ir::FunctionArgument::STRUCTURE, reg, llvmInfo, structSize, getAlignmentByte(unit, type), 0);
}
// Regular user provided pointer (global, local or constant)
else {
const uint32_t align = getAlignmentByte(unit, pointed);
switch (addrSpace) {
case ir::MEM_GLOBAL:
- ctx.input(argName, ir::FunctionArgument::GLOBAL_POINTER, reg, llvmInfo, ptrSize, align);
+ globalPointer.insert(std::make_pair(I, btiBase));
+ ctx.input(argName, ir::FunctionArgument::GLOBAL_POINTER, reg, llvmInfo, ptrSize, align, btiBase);
+ btiBase++;
break;
case ir::MEM_LOCAL:
- ctx.input(argName, ir::FunctionArgument::LOCAL_POINTER, reg, llvmInfo, ptrSize, align);
+ ctx.input(argName, ir::FunctionArgument::LOCAL_POINTER, reg, llvmInfo, ptrSize, align, 0xfe);
ctx.getFunction().setUseSLM(true);
break;
case ir::MEM_CONSTANT:
- ctx.input(argName, ir::FunctionArgument::CONSTANT_POINTER, reg, llvmInfo, ptrSize, align);
+ ctx.input(argName, ir::FunctionArgument::CONSTANT_POINTER, reg, llvmInfo, ptrSize, align, 0x2);
break;
case ir::IMAGE:
- ctx.input(argName, ir::FunctionArgument::IMAGE, reg, llvmInfo, ptrSize, align);
- ctx.getFunction().getImageSet()->append(reg, &ctx);
+ ctx.input(argName, ir::FunctionArgument::IMAGE, reg, llvmInfo, ptrSize, align, 0x0);
+ ctx.getFunction().getImageSet()->append(reg, &ctx, btiBase++);
break;
default: GBE_ASSERT(addrSpace != ir::MEM_PRIVATE);
}
const ir::AddressSpace addrSpace = addressSpaceLLVMToGen(llvmSpace);
const ir::Register dst = this->getRegister(&I);
+ ir::BTI bti;
+ gatherBTI(*AI, bti);
vector<ir::Register> src;
uint32_t srcNum = 0;
while(AI != AE) {
srcNum++;
}
const ir::Tuple srcTuple = ctx.arrayTuple(&src[0], srcNum);
- ctx.ATOMIC(opcode, dst, addrSpace, srcTuple);
+ ctx.ATOMIC(opcode, dst, addrSpace, bti, srcTuple);
}
/* append a new sampler. should be called before any reference to
void GenWriter::emitBatchLoadOrStore(const ir::Type type, const uint32_t elemNum,
Value *llvmValues, const ir::Register ptr,
const ir::AddressSpace addrSpace,
- Type * elemType, bool isLoad) {
+ Type * elemType, bool isLoad, ir::BTI bti) {
const ir::RegisterFamily pointerFamily = ctx.getPointerFamily();
uint32_t totalSize = elemNum * getFamilySize(getFamily(type));
uint32_t msgNum = totalSize > 16 ? totalSize / 16 : 1;
// Emit the instruction
if (isLoad)
- ctx.LOAD(type, tuple, addr, addrSpace, perMsgNum, true);
+ ctx.LOAD(type, tuple, addr, addrSpace, perMsgNum, true, bti);
else
- ctx.STORE(type, tuple, addr, addrSpace, perMsgNum, true);
+ ctx.STORE(type, tuple, addr, addrSpace, perMsgNum, true, bti);
}
}
+ // The idea behind is to search along the use-def chain, and find out all
+ // possible source of the pointer. Then in later codeGen, we can emit
+ // read/store instructions to these btis gathered.
+ void GenWriter::gatherBTI(Value *pointer, ir::BTI &bti) {
+ typedef map<const Value*, int>::iterator GlobalPtrIter;
+ Value *p;
+ size_t idx = 0;
+ int nBTI = 0;
+ std::vector<Value*> candidates;
+ candidates.push_back(pointer);
+ std::set<Value*> processed;
+ bool needNewBTI = true;
+
+ while (idx < candidates.size()) {
+ bool isPrivate = false;
+ p = candidates[idx];
+
+ while (dyn_cast<User>(p)) {
+
+ if (processed.find(p) == processed.end()) {
+ processed.insert(p);
+ } else {
+ // This use-def chain falls into a loop,
+ // it does not introduce a new buffer source.
+ needNewBTI = false;
+ break;
+ }
+
+ if (dyn_cast<SelectInst>(p)) {
+ SelectInst *sel = cast<SelectInst>(p);
+ p = sel->getTrueValue();
+ candidates.push_back(sel->getFalseValue());
+ continue;
+ }
+
+ if (dyn_cast<PHINode>(p)) {
+ PHINode* phi = cast<PHINode>(p);
+ int n = phi->getNumIncomingValues();
+ for (int j = 1; j < n; j++)
+ candidates.push_back(phi->getIncomingValue(j));
+ p = phi->getIncomingValue(0);
+ continue;
+ }
+
+ if (dyn_cast<AllocaInst>(p)) {
+ isPrivate = true;
+ break;
+ }
+ p = cast<User>(p)->getOperand(0);
+ }
+
+ if (needNewBTI == false) {
+ // go to next possible pointer source
+ idx++; continue;
+ }
+
+ uint8_t new_bti = 0;
+ if (isPrivate) {
+ new_bti = BTI_PRIVATE;
+ } else {
+ if(isa<Argument>(p) && dyn_cast<Argument>(p)->hasByValAttr()) {
+ // structure value implementation is not complete now,
+ // they are now treated as push constant, so, the load/store
+ // here is not as meaningful.
+ bti.bti[0] = BTI_PRIVATE;
+ bti.count = 1;
+ break;
+ }
+ Type *ty = p->getType();
+ if(ty->getPointerAddressSpace() == 3) {
+ // __local memory
+ new_bti = 0xfe;
+ } else {
+ // __global memory
+ GlobalPtrIter iter = globalPointer.find(p);
+ GBE_ASSERT(iter != globalPointer.end());
+ new_bti = iter->second;
+ }
+ }
+ // avoid duplicate
+ bool bFound = false;
+ for (int j = 0; j < nBTI; j++) {
+ if (bti.bti[j] == new_bti) {
+ bFound = true; break;
+ }
+ }
+ if (bFound == false) {
+ bti.bti[nBTI++] = new_bti;
+ bti.count = nBTI;
+ }
+ idx++;
+ }
+ GBE_ASSERT(bti.count <= MAX_MIXED_POINTER);
+ }
+
extern int OCL_SIMD_WIDTH;
template <bool isLoad, typename T>
INLINE void GenWriter::emitLoadOrStore(T &I)
const bool dwAligned = (I.getAlignment() % 4) == 0;
const ir::AddressSpace addrSpace = addressSpaceLLVMToGen(llvmSpace);
const ir::Register ptr = this->getRegister(llvmPtr);
-
+ ir::BTI binding;
+ if(addrSpace == ir::MEM_GLOBAL || addrSpace == ir::MEM_PRIVATE) {
+ gatherBTI(llvmPtr, binding);
+ }
// Scalar is easy. We neednot build register tuples
if (isScalarType(llvmType) == true) {
const ir::Type type = getType(ctx, llvmType);
const ir::Register values = this->getRegister(llvmValues);
if (isLoad)
- ctx.LOAD(type, ptr, addrSpace, dwAligned, values);
+ ctx.LOAD(type, ptr, addrSpace, dwAligned, binding, values);
else
- ctx.STORE(type, ptr, addrSpace, dwAligned, values);
+ ctx.STORE(type, ptr, addrSpace, dwAligned, binding, values);
}
// A vector type requires to build a tuple
else {
// Emit the instruction
if (isLoad)
- ctx.LOAD(type, tuple, ptr, addrSpace, elemNum, dwAligned);
+ ctx.LOAD(type, tuple, ptr, addrSpace, elemNum, dwAligned, binding);
else
- ctx.STORE(type, tuple, ptr, addrSpace, elemNum, dwAligned);
+ ctx.STORE(type, tuple, ptr, addrSpace, elemNum, dwAligned, binding);
}
// Not supported by the hardware. So, we split the message and we use
// strided loads and stores
else {
- emitBatchLoadOrStore(type, elemNum, llvmValues, ptr, addrSpace, elemType, isLoad);
+ emitBatchLoadOrStore(type, elemNum, llvmValues, ptr, addrSpace, elemType, isLoad, binding);
}
}
else if((dataFamily==ir::FAMILY_WORD && elemNum%2==0) || (dataFamily == ir::FAMILY_BYTE && elemNum%4 == 0)) {
- emitBatchLoadOrStore(type, elemNum, llvmValues, ptr, addrSpace, elemType, isLoad);
+ emitBatchLoadOrStore(type, elemNum, llvmValues, ptr, addrSpace, elemType, isLoad, binding);
} else {
for (uint32_t elemID = 0; elemID < elemNum; elemID++) {
if(regTranslator.isUndefConst(llvmValues, elemID))
ctx.ADD(ir::TYPE_S32, addr, ptr, offset);
}
if (isLoad)
- ctx.LOAD(type, addr, addrSpace, dwAligned, reg);
+ ctx.LOAD(type, addr, addrSpace, dwAligned, binding, reg);
else
- ctx.STORE(type, addr, addrSpace, dwAligned, reg);
+ ctx.STORE(type, addr, addrSpace, dwAligned, binding, reg);
}
}
}
--- /dev/null
+
+kernel void compiler_mixed_pointer(__global uint* src1, __global uint *src2, __global uint *dst) {
+ int x = get_global_id(0);
+ global uint * tmp = NULL;
+
+ switch(x) {
+ case 0:
+ case 1:
+ case 4:
+ tmp = src1;
+ break;
+ default:
+ tmp = src2;
+ break;
+ }
+ dst[x] = tmp[x];
+}
+
+kernel void compiler_mixed_pointer1(__global uint* src, __global uint *dst1, __global uint *dst2) {
+ int x = get_global_id(0);
+ global uint * tmp = x < 5 ? dst1 : dst2;
+ tmp[x] = src[x];
+}
offset = interp_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, i);
if (k->args[i].mem->type == CL_MEM_SUBBUFFER_TYPE) {
struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)k->args[i].mem;
- cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, buffer->sub_offset, cl_gpgpu_get_cache_ctrl());
+ cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, buffer->sub_offset, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i));
} else {
- cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, 0, cl_gpgpu_get_cache_ctrl());
+ cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, 0, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i));
}
}
if(raw_size == 0)
return 0;
- cl_buffer bo = cl_gpgpu_alloc_constant_buffer(gpgpu, aligned_size);
+ cl_buffer bo = cl_gpgpu_alloc_constant_buffer(gpgpu, aligned_size, BTI_CONSTANT);
if (bo == NULL)
return -1;
cl_buffer_map(bo, 1);
*/
if(cl_driver_get_ver(ctx->drv) == 75)
stack_sz *= 4;
-
- cl_gpgpu_set_stack(gpgpu, offset, stack_sz, cl_gpgpu_get_cache_ctrl());
+ cl_gpgpu_set_stack(gpgpu, offset, stack_sz, BTI_PRIVATE);
}
static int
extern cl_gpgpu_sync_cb *cl_gpgpu_sync;
/* Bind a regular unformatted buffer */
-typedef void (cl_gpgpu_bind_buf_cb)(cl_gpgpu, cl_buffer, uint32_t offset, uint32_t internal_offset, uint32_t cchint);
+typedef void (cl_gpgpu_bind_buf_cb)(cl_gpgpu, cl_buffer, uint32_t offset, uint32_t internal_offset, uint32_t size, uint8_t bti);
extern cl_gpgpu_bind_buf_cb *cl_gpgpu_bind_buf;
/* bind samplers defined in both kernel and kernel args. */
typedef int (cl_gpgpu_upload_curbes_cb)(cl_gpgpu, const void* data, uint32_t size);
extern cl_gpgpu_upload_curbes_cb *cl_gpgpu_upload_curbes;
-typedef cl_buffer (cl_gpgpu_alloc_constant_buffer_cb)(cl_gpgpu, uint32_t size);
+typedef cl_buffer (cl_gpgpu_alloc_constant_buffer_cb)(cl_gpgpu, uint32_t size, uint8_t bti);
extern cl_gpgpu_alloc_constant_buffer_cb *cl_gpgpu_alloc_constant_buffer;
/* Setup all indirect states */
gbe_kernel_get_code_size_cb *interp_kernel_get_code_size = NULL;
gbe_kernel_get_arg_num_cb *interp_kernel_get_arg_num = NULL;
gbe_kernel_get_arg_size_cb *interp_kernel_get_arg_size = NULL;
+gbe_kernel_get_arg_bti_cb *interp_kernel_get_arg_bti = NULL;
gbe_kernel_get_arg_type_cb *interp_kernel_get_arg_type = NULL;
gbe_kernel_get_arg_align_cb *interp_kernel_get_arg_align = NULL;
gbe_kernel_get_simd_width_cb *interp_kernel_get_simd_width = NULL;
if (interp_kernel_get_arg_size == NULL)
return false;
+ interp_kernel_get_arg_bti = *(gbe_kernel_get_arg_bti_cb**)dlsym(dlhInterp, "gbe_kernel_get_arg_bti");
+ if (interp_kernel_get_arg_bti == NULL)
+ return false;
+
interp_kernel_get_arg_type = *(gbe_kernel_get_arg_type_cb**)dlsym(dlhInterp, "gbe_kernel_get_arg_type");
if (interp_kernel_get_arg_type == NULL)
return false;
extern gbe_kernel_get_code_size_cb *interp_kernel_get_code_size;
extern gbe_kernel_get_arg_num_cb *interp_kernel_get_arg_num;
extern gbe_kernel_get_arg_size_cb *interp_kernel_get_arg_size;
+extern gbe_kernel_get_arg_bti_cb *interp_kernel_get_arg_bti;
extern gbe_kernel_get_arg_type_cb *interp_kernel_get_arg_type;
extern gbe_kernel_get_arg_align_cb *interp_kernel_get_arg_align;
extern gbe_kernel_get_simd_width_cb *interp_kernel_get_simd_width;
k->args[index].mem = mem;
k->args[index].is_set = 1;
k->args[index].local_sz = 0;
-
+ k->args[index].bti = interp_kernel_get_arg_bti(k->opaque, index);
return CL_SUCCESS;
}
typedef struct cl_argument {
cl_mem mem; /* For image and regular buffers */
cl_sampler sampler; /* For sampler. */
+ unsigned char bti;
uint32_t local_sz:31; /* For __local size specification */
uint32_t is_set:1; /* All args must be set before NDRange */
} cl_argument;
unsigned long img_bitmap; /* image usage bitmap. */
unsigned int img_index_base; /* base index for image surface.*/
- drm_intel_bo *binded_img[max_img_n + 128]; /* all images binded for the call */
unsigned long sampler_bitmap; /* sampler usage bitmap. */
}
static dri_bo*
-intel_gpgpu_alloc_constant_buffer_gen7(intel_gpgpu_t *gpgpu, uint32_t size)
+intel_gpgpu_alloc_constant_buffer_gen7(intel_gpgpu_t *gpgpu, uint32_t size, uint8_t bti)
{
uint32_t s = size - 1;
assert(size != 0);
surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
- gen7_surface_state_t *ss2 = (gen7_surface_state_t *) heap->surface[2];
+ gen7_surface_state_t *ss2 = (gen7_surface_state_t *) heap->surface[bti];
memset(ss2, 0, sizeof(gen7_surface_state_t));
ss2->ss0.surface_type = I965_SURFACE_BUFFER;
ss2->ss0.surface_format = I965_SURFACEFORMAT_R32G32B32A32_UINT;
ss2->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */
ss2->ss3.depth = (s >> 21) & 0x3ff; /* bits 30:21 of sz */
ss2->ss5.cache_control = cl_gpgpu_get_cache_ctrl();
- heap->binding_table[2] = offsetof(surface_heap_t, surface) + 2* sizeof(gen7_surface_state_t);
+ heap->binding_table[bti] = offsetof(surface_heap_t, surface) + bti* sizeof(gen7_surface_state_t);
if(gpgpu->constant_b.bo)
dri_bo_unreference(gpgpu->constant_b.bo);
I915_GEM_DOMAIN_RENDER,
0,
gpgpu->aux_offset.surface_heap_offset +
- heap->binding_table[2] +
+ heap->binding_table[bti] +
offsetof(gen7_surface_state_t, ss1),
gpgpu->constant_b.bo);
return gpgpu->constant_b.bo;
}
static dri_bo*
-intel_gpgpu_alloc_constant_buffer_gen75(intel_gpgpu_t *gpgpu, uint32_t size)
+intel_gpgpu_alloc_constant_buffer_gen75(intel_gpgpu_t *gpgpu, uint32_t size, uint8_t bti)
{
uint32_t s = size - 1;
assert(size != 0);
surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
- gen7_surface_state_t *ss2 = (gen7_surface_state_t *) heap->surface[2];
+ gen7_surface_state_t *ss2 = (gen7_surface_state_t *) heap->surface[bti];
memset(ss2, 0, sizeof(gen7_surface_state_t));
ss2->ss0.surface_type = I965_SURFACE_BUFFER;
ss2->ss0.surface_format = I965_SURFACEFORMAT_R32G32B32A32_UINT;
ss2->ss7.shader_g = I965_SURCHAN_SELECT_GREEN;
ss2->ss7.shader_b = I965_SURCHAN_SELECT_BLUE;
ss2->ss7.shader_a = I965_SURCHAN_SELECT_ALPHA;
- heap->binding_table[2] = offsetof(surface_heap_t, surface) + 2* sizeof(gen7_surface_state_t);
+ heap->binding_table[bti] = offsetof(surface_heap_t, surface) + bti* sizeof(gen7_surface_state_t);
if(gpgpu->constant_b.bo)
dri_bo_unreference(gpgpu->constant_b.bo);
I915_GEM_DOMAIN_RENDER,
0,
gpgpu->aux_offset.surface_heap_offset +
- heap->binding_table[2] +
+ heap->binding_table[bti] +
offsetof(gen7_surface_state_t, ss1),
gpgpu->constant_b.bo);
return gpgpu->constant_b.bo;
}
-
-/* Map address space with two 2GB surfaces. One surface for untyped message and
- * one surface for byte scatters / gathers. Actually the HW does not require two
- * surfaces but Fulsim complains
- */
static void
-intel_gpgpu_map_address_space(intel_gpgpu_t *gpgpu)
+intel_gpgpu_setup_bti(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t internal_offset, uint32_t size, unsigned char index)
{
+ uint32_t s = size - 1;
surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
- gen7_surface_state_t *ss0 = (gen7_surface_state_t *) heap->surface[0];
- gen7_surface_state_t *ss1 = (gen7_surface_state_t *) heap->surface[1];
+ gen7_surface_state_t *ss0 = (gen7_surface_state_t *) heap->surface[index];
memset(ss0, 0, sizeof(gen7_surface_state_t));
- memset(ss1, 0, sizeof(gen7_surface_state_t));
- ss1->ss0.surface_type = ss0->ss0.surface_type = I965_SURFACE_BUFFER;
- ss1->ss0.surface_format = ss0->ss0.surface_format = I965_SURFACEFORMAT_RAW;
- ss1->ss2.width = ss0->ss2.width = 127; /* bits 6:0 of sz */
- ss1->ss2.height = ss0->ss2.height = 16383; /* bits 20:7 of sz */
- ss0->ss3.depth = 1023; /* bits 30:21 of sz */
- ss1->ss3.depth = 1023; /* bits 30:21 of sz */
- ss1->ss5.cache_control = ss0->ss5.cache_control = cl_gpgpu_get_cache_ctrl();
- heap->binding_table[0] = offsetof(surface_heap_t, surface);
- heap->binding_table[1] = sizeof(gen7_surface_state_t) + offsetof(surface_heap_t, surface);
+ ss0->ss0.surface_type = I965_SURFACE_BUFFER;
+ ss0->ss0.surface_format = I965_SURFACEFORMAT_RAW;
+ ss0->ss2.width = s & 0x7f; /* bits 6:0 of sz */
+ ss0->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */
+ ss0->ss3.depth = (s >> 21) & 0x3ff; /* bits 30:21 of sz */
+ ss0->ss5.cache_control = cl_gpgpu_get_cache_ctrl();
+ heap->binding_table[index] = offsetof(surface_heap_t, surface) + index * sizeof(gen7_surface_state_t);
+
+ ss0->ss1.base_addr = buf->offset + internal_offset;
+ dri_bo_emit_reloc(gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ internal_offset,
+ gpgpu->aux_offset.surface_heap_offset +
+ heap->binding_table[index] +
+ offsetof(gen7_surface_state_t, ss1),
+ buf);
}
+
static int
intel_is_surface_array(cl_mem_object_type type)
{
}
ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */
intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset);
- gpgpu->binded_img[index - gpgpu->img_index_base] = obj_bo;
assert(index < GEN_MAX_SURFACES);
}
surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index];
memset(ss, 0, sizeof(*ss));
-
ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
if (index > 128 + 2 && type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
ss->ss0.surface_type = I965_SURFACE_2D;
}
ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */
intel_gpgpu_set_buf_reloc_gen7(gpgpu, index, obj_bo, obj_bo_offset);
- gpgpu->binded_img[index - gpgpu->img_index_base] = obj_bo;
assert(index < GEN_MAX_SURFACES);
}
static void
intel_gpgpu_bind_buf(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t offset,
- uint32_t internal_offset, uint32_t cchint)
+ uint32_t internal_offset, uint32_t size, uint8_t bti)
{
assert(gpgpu->binded_n < max_buf_n);
gpgpu->binded_buf[gpgpu->binded_n] = buf;
gpgpu->target_buf_offset[gpgpu->binded_n] = internal_offset;
gpgpu->binded_offset[gpgpu->binded_n] = offset;
gpgpu->binded_n++;
+ intel_gpgpu_setup_bti(gpgpu, buf, internal_offset, size, bti);
}
static int
return 0;
}
static void
-intel_gpgpu_set_stack(intel_gpgpu_t *gpgpu, uint32_t offset, uint32_t size, uint32_t cchint)
+intel_gpgpu_set_stack(intel_gpgpu_t *gpgpu, uint32_t offset, uint32_t size, uint8_t bti)
{
drm_intel_bufmgr *bufmgr = gpgpu->drv->bufmgr;
gpgpu->stack_b.bo = drm_intel_bo_alloc(bufmgr, "STACK", size, 64);
- intel_gpgpu_bind_buf(gpgpu, gpgpu->stack_b.bo, offset, 0, cchint);
+
+ intel_gpgpu_bind_buf(gpgpu, gpgpu->stack_b.bo, offset, 0, size, bti);
}
static void
{
gpgpu->ker = kernel;
intel_gpgpu_build_idrt(gpgpu, kernel);
- intel_gpgpu_map_address_space(gpgpu);
dri_bo_unmap(gpgpu->aux_buf.bo);
}
}
memset(bo->virtual, 0, size);
drm_intel_bo_unmap(bo);
-
- intel_gpgpu_bind_buf(gpgpu, bo, offset, 0, 0);
+ intel_gpgpu_bind_buf(gpgpu, bo, offset, 0, size, 0);
return 0;
}
compiler_insert_to_constant.cpp
compiler_argument_structure.cpp
compiler_arith_shift_right.cpp
+ compiler_mixed_pointer.cpp
compiler_array0.cpp
compiler_array.cpp
compiler_array1.cpp
--- /dev/null
+#include "utest_helper.hpp"
+
+static void cpu(int global_id, int *src1, int *src2, int *dst) {
+ int * tmp = NULL;
+
+ switch(global_id) {
+ case 0:
+ case 1:
+ case 4:
+ tmp = src1;
+ break;
+ default:
+ tmp = src2;
+ break;
+ }
+ dst[global_id] = tmp[global_id];
+
+}
+static void cpu1(int global_id, int *src, int *dst1, int *dst2) {
+ int * tmp = global_id < 5 ? dst1 : dst2;
+ tmp[global_id] = src[global_id];
+}
+
+void compiler_mixed_pointer(void)
+{
+ const size_t n = 16;
+ int cpu_dst[16], cpu_src[16], cpu_src1[16];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_mixed_pointer");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+ globals[0] = 16;
+ locals[0] = 16;
+
+ // Run random tests
+ for (uint32_t pass = 0; pass < 1; ++pass) {
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ for (int32_t i = 0; i < (int32_t) n; ++i) {
+ cpu_src[i] = ((int32_t*)buf_data[0])[i] = i;
+ cpu_src1[i] = ((int32_t*)buf_data[1])[i] = 65536-i;
+ }
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Run on CPU
+ for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_src1, cpu_dst);
+
+ // Compare
+ OCL_MAP_BUFFER(2);
+ for (size_t i = 0; i < n; ++i) {
+// printf(" %d %d\n", cpu_dst[i], ((int32_t*)buf_data[2])[i]);
+ OCL_ASSERT(((int32_t*)buf_data[2])[i] == cpu_dst[i]);
+ }
+ OCL_UNMAP_BUFFER(2);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_mixed_pointer);
+
+void compiler_mixed_pointer1(void)
+{
+ const size_t n = 16;
+ int cpu_dst1[16], cpu_dst2[16], cpu_src[16];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL_FROM_FILE("compiler_mixed_pointer", "compiler_mixed_pointer1");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+ OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(uint32_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+ globals[0] = 16;
+ locals[0] = 16;
+
+ // Run random tests
+ for (uint32_t pass = 0; pass < 1; ++pass) {
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ OCL_MAP_BUFFER(2);
+ for (int32_t i = 0; i < (int32_t) n; ++i) {
+ cpu_src[i] = ((int32_t*)buf_data[0])[i] = i;
+ cpu_dst1[i] = ((int32_t*)buf_data[1])[i] = 0xff;
+ cpu_dst2[i] = ((int32_t*)buf_data[2])[i] = 0xff;
+ }
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+ OCL_UNMAP_BUFFER(2);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Run on CPU
+ for (int32_t i = 0; i <(int32_t) n; ++i) cpu1(i, cpu_src, cpu_dst1, cpu_dst2);
+
+ // Compare
+ OCL_MAP_BUFFER(1);
+ OCL_MAP_BUFFER(2);
+ for (size_t i = 0; i < n; ++i) {
+// printf(" %d %d\n", cpu_dst1[i], ((int32_t*)buf_data[1])[i]);
+// printf(" %d %d\n", ((int32_t*)buf_data[2])[i], cpu_dst2[i]);
+ OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst1[i]);
+ OCL_ASSERT(((int32_t*)buf_data[2])[i] == cpu_dst2[i]);
+ }
+ OCL_UNMAP_BUFFER(1);
+ OCL_UNMAP_BUFFER(2);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_mixed_pointer1);