GBE: Fix type size for vector3
authorRuiling Song <ruiling.song@intel.com>
Mon, 11 Aug 2014 05:48:49 +0000 (13:48 +0800)
committerYang Rong <rong.r.yang@intel.com>
Tue, 12 Aug 2014 06:34:55 +0000 (14:34 +0800)
According to OCL spec, size of vector3 are aligned to vector4.
And for memory load/store, clang already aligned it to vector4.
If we do not calculate private/local memory size as vector4,
out of range memory access will appear.

This can fix the failure of opencv 3.0 case:
OCL_Arithm/MeanStdDev.Mat_Mask

v2:
  vec3 constant data should be aligned to vec4.

Signed-off-by: Ruiling Song <ruiling.song@intel.com>
Reviewed-by: Yang Rong <rong.r.yang@intel.com>
backend/src/llvm/llvm_gen_backend.cpp
backend/src/llvm/llvm_passes.cpp

index d931659..240a1d2 100644 (file)
@@ -693,8 +693,11 @@ namespace gbe
       case Type::TypeID::VectorTyID:
         {
           const ConstantDataSequential *cds = dyn_cast<ConstantDataSequential>(c);
+          const VectorType *vecTy = cast<VectorType>(type);
           GBE_ASSERT(cds);
           getSequentialData(cds, mem, offset);
+          if(vecTy->getNumElements() == 3) // OCL spec require align to vec4
+            offset += getTypeByteSize(unit, vecTy->getElementType());
           break;
         }
       case Type::TypeID::IntegerTyID:
index b8ab844..1a38a0c 100644 (file)
@@ -181,7 +181,9 @@ namespace gbe
       case Type::VectorTyID:
       {
         const VectorType* VecTy = cast<VectorType>(Ty);
-        return VecTy->getNumElements() * getTypeBitSize(unit, VecTy->getElementType());
+        uint32_t numElem = VecTy->getNumElements();
+        if(numElem == 3) numElem = 4; // OCL spec
+        return numElem * getTypeBitSize(unit, VecTy->getElementType());
       }
       case Type::ArrayTyID:
       {