#define OPENCV_CUDEV_UTIL_SATURATE_CAST_HPP
#include "../common.hpp"
-#include "opencv2/core/private.cuda.hpp"
+#if __CUDACC_VER_MAJOR__ >= 9
+#include <cuda_fp16.h>
+#endif
namespace cv { namespace cudev {
template <> __device__ __forceinline__ float cast_fp16<short, float>(short v)
{
-#if __CUDACC_VER_MAJOR__ >= 9
+#if __CUDACC_VER_MAJOR__ >= 9
return float(*(__half*)&v);
#else
return __half2float(v);
template <> __device__ __forceinline__ short cast_fp16<float, short>(float v)
{
-#if __CUDACC_VER_MAJOR__ >= 9
+#if __CUDACC_VER_MAJOR__ >= 9
__half h(v);
return *(short*)&v;
#else