From a2ac260524b89ad062e485ec2db9424a4d6afd2c Mon Sep 17 00:00:00 2001 From: Johannes M Dieterich Date: Mon, 8 Apr 2019 09:44:08 -0700 Subject: [PATCH] ifdef guard some explicit pragma unrolls (#19018) Summary: the ROCm compiler cannot and will not satisfy them, causing compile time warnings. Reason being a runtime loop trip count. Some warnings remain arising from other parts of the ROCm stack - tickets are filed and they will be resolved within these components. Pull Request resolved: https://github.com/pytorch/pytorch/pull/19018 Differential Revision: D14832859 Pulled By: ezyang fbshipit-source-id: 0d66e4aebe4e56af14dd5e2967d3c374a82be25c --- aten/src/THC/THCSortUtils.cuh | 2 ++ aten/src/THCUNN/SpatialDepthwiseConvolution.cu | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/aten/src/THC/THCSortUtils.cuh b/aten/src/THC/THCSortUtils.cuh index 4980ee1..7d39b3b 100644 --- a/aten/src/THC/THCSortUtils.cuh +++ b/aten/src/THC/THCSortUtils.cuh @@ -66,7 +66,9 @@ __device__ inline void bitonicSort(K keys[Power2SortSize], for (unsigned int size = 2; size < Power2SortSize; size *= 2) { bool flag = ((threadIdx.x & (size / 2)) != 0); +#ifndef __HIP_PLATFORM_HCC__ #pragma unroll +#endif for (unsigned int stride = size / 2; stride > 0; stride /= 2) { __syncthreads(); diff --git a/aten/src/THCUNN/SpatialDepthwiseConvolution.cu b/aten/src/THCUNN/SpatialDepthwiseConvolution.cu index baf7610..2ee0417 100644 --- a/aten/src/THCUNN/SpatialDepthwiseConvolution.cu +++ b/aten/src/THCUNN/SpatialDepthwiseConvolution.cu @@ -76,7 +76,9 @@ __global__ void spatialDepthwiseConvolutionUpdateOutput( AccT value = biasEnabled ? ScalarConvert::to(bias.data()[c]) : ScalarConvert::to(0); const IndexType offset0 = (n * inputChannels + inputChannel) * inputHeight * inputWidth; +#ifndef __HIP_PLATFORM_HCC__ #pragma unroll +#endif for (int kH = 0; kH < KH_LIMIT; ++kH) { #ifndef __HIP_PLATFORM_HCC__ #pragma unroll @@ -136,7 +138,9 @@ __global__ void spatialDepthwiseConvolutionUpdateGradInput( AccT value = ScalarConvert::to(0); +#ifndef __HIP_PLATFORM_HCC__ #pragma unroll +#endif for (int multiplier = 0; multiplier < depthwiseMultiplier; ++multiplier) { int och = (c * depthwiseMultiplier) + multiplier; int weightOffset = och * kernelHeight * kernelWidth; -- 2.7.4