From 9c2f56daae96f51e39d4f504d84e5ef59bea77b5 Mon Sep 17 00:00:00 2001 From: Zhigang Gong Date: Fri, 31 May 2013 10:09:56 +0800 Subject: [PATCH] GBE: Fixed a 3 elements vector load/store bug. Per OpenCL spec, for 3-component vector data types,the size of the data type is 4 * sizeof(component). And llvm FE really cast a type3 data to type4 data for load/store instruction, thus break our implementation. We need to fixup it to the actual element size. Signed-off-by: Zhigang Gong Reviewed-by: Yang Rong --- backend/src/llvm/llvm_gen_backend.cpp | 17 +++++++++++++++-- kernels/compiler_vector_load_store.cl | 10 +++++----- utests/compiler_vector_load_store.cpp | 2 +- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 5189db3..a9c726b 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -368,6 +368,13 @@ namespace gbe const auto key = std::make_pair(value, index); return scalarMap.find(key) != scalarMap.end(); } + /*! if it's a undef const value, return true. Otherwise, return false. */ + bool isUndefConst(Value *value, uint32_t index) { + getRealValue(value, index); + + Constant *CPV = dyn_cast(value); + return (CPV && (isa(CPV))); + } private: /*! This creates a scalar register for a Value (index is the vector index when * the value is a vector of scalars) @@ -2158,10 +2165,16 @@ namespace gbe Type *elemType = vectorType->getElementType(); // We follow OCL spec and support 2,3,4,8,16 elements only - const uint32_t elemNum = vectorType->getNumElements(); + uint32_t elemNum = vectorType->getNumElements(); GBE_ASSERTM(elemNum == 2 || elemNum == 3 || elemNum == 4 || elemNum == 8 || elemNum == 16, "Only vectors of 2,3,4,8 or 16 elements are supported"); - + // Per OPenCL 1.2 spec 6.1.5: + // For 3-component vector data types, the size of the data type is 4 * sizeof(component). + // And the llvm does cast a type3 data to type4 for load/store instruction, + // so a 4 elements vector may only have 3 valid elements. We need to fix it to correct element + // count here. + if (elemNum == 4 && regTranslator.isUndefConst(llvmValues, 3)) + elemNum = 3; // The code is going to be fairly different from types to types (based on // size of each vector element) const ir::Type type = getType(ctx, elemType); diff --git a/kernels/compiler_vector_load_store.cl b/kernels/compiler_vector_load_store.cl index b362412..30f0e1e 100644 --- a/kernels/compiler_vector_load_store.cl +++ b/kernels/compiler_vector_load_store.cl @@ -18,12 +18,12 @@ __kernel void test_##type ##n(__global type *pin, \ } #define TEST_ALL_TYPE(n) \ - TEST_TYPE(char,n) \ + TEST_TYPE(char,n) \ TEST_TYPE(uchar,n) \ TEST_TYPE(short,n) \ - TEST_TYPE(ushort,n) \ - TEST_TYPE(int,n) \ - TEST_TYPE(uint,n) \ + TEST_TYPE(ushort,n)\ + TEST_TYPE(int,n) \ + TEST_TYPE(uint,n) \ TEST_TYPE(float,n) #if 0 @@ -34,7 +34,7 @@ __kernel void test_##type ##n(__global type *pin, \ #endif TEST_ALL_TYPE(2) -//TEST_ALL_TYPE(3) +TEST_ALL_TYPE(3) TEST_ALL_TYPE(4) TEST_ALL_TYPE(8) TEST_ALL_TYPE(16) diff --git a/utests/compiler_vector_load_store.cpp b/utests/compiler_vector_load_store.cpp index 76c12a1..79f284f 100644 --- a/utests/compiler_vector_load_store.cpp +++ b/utests/compiler_vector_load_store.cpp @@ -42,7 +42,7 @@ MAKE_UTEST_FROM_FUNCTION(compiler_vector_ ## kernel_type ##n ##_load_store); #define test_all_vector(type, kernel_type) \ compiler_vector_load_store(type, 2, kernel_type) \ - /*compiler_vector_load_store(type, 3, kernel_type)*/ \ + compiler_vector_load_store(type, 3, kernel_type) \ compiler_vector_load_store(type, 4, kernel_type) \ compiler_vector_load_store(type, 8, kernel_type) \ compiler_vector_load_store(type, 16, kernel_type) -- 2.7.4