Enable more caffe2 fp16 rocm tests (#15040)
authorbddppq <bai@in.tum.de>
Tue, 11 Dec 2018 05:25:45 +0000 (21:25 -0800)
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>
Tue, 11 Dec 2018 05:30:21 +0000 (21:30 -0800)
Summary:
cc rohithkrn petrex
Pull Request resolved: https://github.com/pytorch/pytorch/pull/15040

Reviewed By: houseroad

Differential Revision: D13413068

Pulled By: bddppq

fbshipit-source-id: b2967f16f8da0b9e80083138fb8632c14e9e9b63

caffe2/python/operator_test/elementwise_op_broadcast_test.py
caffe2/python/operator_test/reduction_ops_test.py
caffe2/python/operator_test/rnn_cell_test.py

index 161f5fc..463fcfe 100644 (file)
@@ -5,7 +5,7 @@ from __future__ import unicode_literals
 
 import unittest
 
-from hypothesis import given
+from hypothesis import given, assume
 import numpy as np
 
 from caffe2.proto import caffe2_pb2
@@ -410,9 +410,11 @@ class TestElementwiseBroadcast(serial.SerializedTestCase):
         dc_cpu_only = [d for d in dc if d.device_type != caffe2_pb2.CUDA]
         self.assertDeviceChecks(dc_cpu_only, op, [X, Y], [0])
 
-    @unittest.skipIf(not workspace.has_gpu_support, "No gpu support")
-    @given(**hu.gcs_gpu_only)
+    @unittest.skipIf(not workspace.has_gpu_support and not workspace.has_hip_support, "No gpu support")
+    @given(**hu.gcs)
     def test_sum_reduce_fp16(self, gc, dc):
+        assume(core.IsGPUDeviceType(gc.device_type))
+
         # Set broadcast and no axis, i.e. broadcasting last dimensions.
         X = np.random.rand(2, 3, 4, 5).astype(np.float16)
         Y = np.random.rand(4, 5).astype(np.float16)
index a138fea..f2aaf8f 100644 (file)
@@ -4,7 +4,7 @@ from __future__ import print_function
 from __future__ import unicode_literals
 
 from caffe2.proto import caffe2_pb2
-from caffe2.python import core
+from caffe2.python import core, workspace
 from hypothesis import assume, given
 import caffe2.python.hypothesis_test_util as hu
 import caffe2.python.serialized_test.serialized_test_util as serial
@@ -67,9 +67,9 @@ class TestReductionOps(serial.SerializedTestCase):
            **hu.gcs)
     def test_elementwise_sqrsum(self, n, dtype, gc, dc):
         if dtype == np.float16:
-            # fp16 is only supported with CUDA
-            assume(gc.device_type == caffe2_pb2.CUDA)
-            dc = [d for d in dc if d.device_type == caffe2_pb2.CUDA]
+            # fp16 is only supported with CUDA/HIP
+            assume(gc.device_type == workspace.GpuDeviceType)
+            dc = [d for d in dc if d.device_type == workspace.GpuDeviceType]
 
         X = np.random.rand(n).astype(dtype)
 
index 66ac07d..744dc60 100644 (file)
@@ -1565,9 +1565,9 @@ class RNNCellTest(hu.HypothesisTestCase):
             self, seed, n, d, t, dtype, dc, use_sequence_lengths, gc):
         np.random.seed(seed)
         if dtype == np.float16:
-            # only supported with CUDA
-            assume(gc.device_type == caffe2_pb2.CUDA)
-            dc = [do for do in dc if do.device_type == caffe2_pb2.CUDA]
+            # only supported with CUDA/HIP
+            assume(gc.device_type == workspace.GpuDeviceType)
+            dc = [do for do in dc if do.device_type == workspace.GpuDeviceType]
 
         if use_sequence_lengths:
             op_inputs = ['hidden_t_prev', 'cell_t_prev', 'gates_t',