From 991e475042d5878e39125e9b8e50e89c952147d4 Mon Sep 17 00:00:00 2001 From: Guo Yejun Date: Tue, 18 Feb 2014 05:30:27 +0800 Subject: [PATCH] GBE: add param to switch the behavior of math func Add OCL_STRICT_CONFORMANCE to switch the behavior of math func, The funcs will be high precision with perf drops if it is 1, Fast path with good enough precision will be selected if it is 0. This change is to add the code basis, with 'sin' and 'cos' implemented as examples, other math functions support will be added later. Signed-off-by: Guo Yejun --- backend/src/backend/program.cpp | 12 ++++++++++++ backend/src/builtin_vector_proto.def | 4 ++++ backend/src/ocl_stdlib.tmpl.h | 8 ++++++++ 3 files changed, 24 insertions(+) diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index 2492a8b..98fcded 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -459,12 +459,20 @@ namespace gbe { /*********************** End of Program class member function *************************/ +#define REDEF_MATH_FUNC(x) "#ifdef "#x"\n#undef "#x"\n#endif\n#define "#x" __gen_ocl_internal_fastpath_"#x"\n" + std::string ocl_mathfunc_fastpath_str = + REDEF_MATH_FUNC(sin) + REDEF_MATH_FUNC(cos) + "\n" + ; + static void programDelete(gbe_program gbeProgram) { gbe::Program *program = (gbe::Program*)(gbeProgram); GBE_SAFE_DELETE(program); } BVAR(OCL_OUTPUT_BUILD_LOG, false); + BVAR(OCL_STRICT_CONFORMANCE, true); SVAR(OCL_PCH_PATH, PCH_OBJECT_DIR); SVAR(OCL_PCM_PATH, PCM_OBJECT_DIR); @@ -760,6 +768,10 @@ namespace gbe { } else fwrite(ocl_stdlib_str.c_str(), strlen(ocl_stdlib_str.c_str()), 1, clFile); + if (!OCL_STRICT_CONFORMANCE) { + fwrite(ocl_mathfunc_fastpath_str.c_str(), strlen(ocl_mathfunc_fastpath_str.c_str()), 1, clFile); + } + // Write the source to the cl file fwrite(source, strlen(source), 1, clFile); fclose(clFile); diff --git a/backend/src/builtin_vector_proto.def b/backend/src/builtin_vector_proto.def index 4393ad5..7bc7c48 100644 --- a/backend/src/builtin_vector_proto.def +++ b/backend/src/builtin_vector_proto.def @@ -129,6 +129,10 @@ gentype tanpi (gentype x) gentype tgamma (gentype) gentype trunc (gentype) +##math function fast path +gentype __gen_ocl_internal_fastpath_sin (gentype) +gentype __gen_ocl_internal_fastpath_cos (gentype) + ##half_native_math #gentype half_cos (gentype x) #gentype half_divide (gentype x, gentype y) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index d2cc144..224cfc6 100755 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -4460,6 +4460,14 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image1d_array_t image) { return __gen_ocl_get_image_array_size(image); } #endif +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sin(float x) { + return native_sin(x); +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cos(float x) { + return native_cos(x); +} + #pragma OPENCL EXTENSION cl_khr_fp64 : disable #undef DECL_IMAGE -- 2.7.4