From d526a2b6e8554dea75e356a8ded225c5419e3860 Mon Sep 17 00:00:00 2001 From: Jan Vesely Date: Tue, 6 Feb 2018 18:44:43 +0000 Subject: [PATCH] Add vstore_half_rtz implementation Passes CTS on carrizo Reviewer: Jeroen Ketema Signed-off-by: Jan Vesely llvm-svn: 324373 --- libclc/generic/include/clc/shared/vstore.h | 2 ++ libclc/generic/lib/shared/vstore.cl | 35 +++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/libclc/generic/include/clc/shared/vstore.h b/libclc/generic/include/clc/shared/vstore.h index e479e33..549e6bf 100644 --- a/libclc/generic/include/clc/shared/vstore.h +++ b/libclc/generic/include/clc/shared/vstore.h @@ -37,10 +37,12 @@ _CLC_VECTOR_VSTORE_PRIM1(ulong) _CLC_VECTOR_VSTORE_PRIM1(float) _CLC_VECTOR_VSTORE_HALF_PRIM1(float,) +_CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rtz) #ifdef cl_khr_fp64 _CLC_VECTOR_VSTORE_PRIM1(double) _CLC_VECTOR_VSTORE_HALF_PRIM1(double,) + _CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtz) #endif #ifdef cl_khr_fp16 diff --git a/libclc/generic/lib/shared/vstore.cl b/libclc/generic/lib/shared/vstore.cl index bafd76d..cbddd59 100644 --- a/libclc/generic/lib/shared/vstore.cl +++ b/libclc/generic/lib/shared/vstore.cl @@ -108,15 +108,48 @@ _CLC_DEF _CLC_OVERLOAD float __clc_noop(float x) { return x; } +_CLC_DEF _CLC_OVERLOAD float __clc_rtz(float x) +{ + /* Remove lower 13 bits to make sure the number is rounded down */ + int mask = 0xffffe000; + const int exp = (as_uint(x) >> 23 & 0xff) - 127; + /* Denormals cannot be flushed, and they use different bit for rounding */ + if (exp < -14) + mask <<= min(-(exp + 14), 10); + /* RTZ does not produce Inf for large numbers */ + if (fabs(x) > 65504.0f && !isinf(x)) + return copysign(65504.0f, x); + /* Handle nan corner case */ + if (isnan(x)) + return x; + return as_float(as_uint(x) & mask); +} #ifdef cl_khr_fp64 _CLC_DEF _CLC_OVERLOAD double __clc_noop(double x) { return x; } +_CLC_DEF _CLC_OVERLOAD double __clc_rtz(double x) +{ + /* Remove lower 42 bits to make sure the number is rounded down */ + ulong mask = 0xfffffc0000000000UL; + const int exp = (as_ulong(x) >> 52 & 0x7ff) - 1023; + /* Denormals cannot be flushed, and they use different bit for rounding */ + if (exp < -14) + mask <<= min(-(exp + 14), 10); + /* RTZ does not produce Inf for large numbers */ + if (fabs(x) > 65504.0 && !isinf(x)) + return copysign(65504.0, x); + /* Handle nan corner case */ + if (isnan(x)) + return x; + return as_double(as_ulong(x) & mask); +} #endif #define __XFUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) \ - __FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_noop) + __FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_noop) \ + __FUNC(SUFFIX ## _rtz, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtz) #define FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) \ __XFUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) -- 2.7.4