According to OCL spec, size of vector3 are aligned to vector4.
And for memory load/store, clang already aligned it to vector4.
If we do not calculate private/local memory size as vector4,
out of range memory access will appear.
This can fix the failure of opencv 3.0 case:
OCL_Arithm/MeanStdDev.Mat_Mask
v2:
vec3 constant data should be aligned to vec4.
Signed-off-by: Ruiling Song <ruiling.song@intel.com>
Reviewed-by: Yang Rong <rong.r.yang@intel.com>
case Type::TypeID::VectorTyID:
{
const ConstantDataSequential *cds = dyn_cast<ConstantDataSequential>(c);
+ const VectorType *vecTy = cast<VectorType>(type);
GBE_ASSERT(cds);
getSequentialData(cds, mem, offset);
+ if(vecTy->getNumElements() == 3) // OCL spec require align to vec4
+ offset += getTypeByteSize(unit, vecTy->getElementType());
break;
}
case Type::TypeID::IntegerTyID:
case Type::VectorTyID:
{
const VectorType* VecTy = cast<VectorType>(Ty);
- return VecTy->getNumElements() * getTypeBitSize(unit, VecTy->getElementType());
+ uint32_t numElem = VecTy->getNumElements();
+ if(numElem == 3) numElem = 4; // OCL spec
+ return numElem * getTypeBitSize(unit, VecTy->getElementType());
}
case Type::ArrayTyID:
{