Summary:
the ROCm compiler cannot and will not satisfy them, causing compile time warnings.
Reason being a runtime loop trip count.
Some warnings remain arising from other parts of the ROCm stack - tickets are filed and they will be resolved within these components.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/19018
Differential Revision:
D14832859
Pulled By: ezyang
fbshipit-source-id:
0d66e4aebe4e56af14dd5e2967d3c374a82be25c
for (unsigned int size = 2; size < Power2SortSize; size *= 2) {
bool flag = ((threadIdx.x & (size / 2)) != 0);
+#ifndef __HIP_PLATFORM_HCC__
#pragma unroll
+#endif
for (unsigned int stride = size / 2; stride > 0; stride /= 2) {
__syncthreads();
AccT value = biasEnabled ? ScalarConvert<T, AccT>::to(bias.data()[c]) : ScalarConvert<int, AccT>::to(0);
const IndexType offset0 = (n * inputChannels + inputChannel) * inputHeight * inputWidth;
+#ifndef __HIP_PLATFORM_HCC__
#pragma unroll
+#endif
for (int kH = 0; kH < KH_LIMIT; ++kH) {
#ifndef __HIP_PLATFORM_HCC__
#pragma unroll
AccT value = ScalarConvert<int, AccT>::to(0);
+#ifndef __HIP_PLATFORM_HCC__
#pragma unroll
+#endif
for (int multiplier = 0; multiplier < depthwiseMultiplier; ++multiplier) {
int och = (c * depthwiseMultiplier) + multiplier;
int weightOffset = och * kernelHeight * kernelWidth;