set(Caffe2_CUDA_DEPENDENCY_LIBS ${Caffe2_CUDA_DEPENDENCY_LIBS} PARENT_SCOPE)
set(Caffe2_GPU_TEST_SRCS ${Caffe2_GPU_TEST_SRCS} PARENT_SCOPE)
set(Caffe2_GPU_BINARY_SRCS ${Caffe2_GPU_BINARY_SRCS} PARENT_SCOPE)
+
+# HIP source
+set(Caffe2_HIP_SRCS ${Caffe2_HIP_SRCS} PARENT_SCOPE)
if(NOT BUILD_ATEN_MOBILE AND BUILD_CAFFE2_OPS)
# Add source generated by Codegen.cmake and pass to parent
list(APPEND Caffe2_CPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/aten_op.cc)
- list(APPEND Caffe2_GPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/aten_op_cuda.cc)
+ list(APPEND Caffe2_GPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/aten_op_gpu.cc)
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE)
set(Caffe2_GPU_SRCS ${Caffe2_GPU_SRCS} PARENT_SCOPE)
+
+ if(USE_ROCM)
+ list(APPEND Caffe2_HIP_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/hip/aten_op_gpu.cc)
+ set(Caffe2_HIP_SRCS ${Caffe2_HIP_SRCS} PARENT_SCOPE)
+ endif()
endif()
}
at::Type& typeFor(const Tensor& ten) {
- return at::getNonVariableType(backend(), typeMetaToScalarType(ten.meta()));
+ at::Backend b = backend();
+#ifdef __HIP_PLATFORM_HCC__
+ if (b == at::Backend::HIP) {
+ b = at::Backend::CUDA;
+ }
+#endif
+ return at::getNonVariableType(b, typeMetaToScalarType(ten.meta()));
}
at::Tensor tensorWrapping(const Tensor& ten_) {
auto& ten = const_cast<Tensor&>(ten_);
auto at_sizes = src.sizes();
caffe2::TypeMeta type_meta = typeMetaFor(src);
at::Device device = src.device();
+#ifdef __HIP_PLATFORM_HCC__
+ if (device.type() == at::DeviceType::CUDA) {
+ device = at::Device(at::DeviceType::HIP, device.index());
+ }
+#endif
at::TensorImpl* src_impl = src.unsafeReleaseTensorImpl();
std::vector<int64_t> dims(at_sizes.begin(), at_sizes.end());
dst->Resize(dims);
{"Dropout", &Caffe2Backend::CreateDropout},
{"LRN", &Caffe2Backend::CreateLRN},
{"DynamicSlice", &Caffe2Backend::CreateDynamicSlice},
- {"RandomNormal", &Caffe2Backend::CreateRandomNormal}};
+ {"RandomNormal", &Caffe2Backend::CreateRandomNormal},
+ {"Where", &Caffe2Backend::CreateWhereOp}};
return kSpecialOperators;
}
return CommonOnnxNodeToCaffe2Ops(onnx_node, ctx);
}
+Caffe2Ops Caffe2Backend::CreateWhereOp(
+ OnnxNode* onnx_node,
+ const ConversionContext& ctx) {
+ // The native Caffe2 op doesn't support broadcasting, so we defer the handling
+ // of this op to the ATen library that does.
+ onnx::NodeProto converted;
+ converted.CopyFrom(onnx_node->node);
+ converted.set_op_type("ATen");
+ onnx::AttributeProto* attr = converted.add_attribute();
+ attr->set_name("operator");
+ attr->set_s("where");
+ OnnxNode new_node(converted);
+ return CommonOnnxNodeToCaffe2Ops(&new_node, ctx);
+}
+
Caffe2Ops Caffe2Backend::CreateReciprocal(
OnnxNode* onnx_node,
const ConversionContext& ctx) {
OnnxNode* onnx_node,
const ConversionContext& ctx);
+ Caffe2Ops CreateWhereOp(OnnxNode* onnx_node, const ConversionContext& ctx);
+
Caffe2Ops CreateBatchNormalization(
OnnxNode* onnx_node,
const ConversionContext& ctx);
'|test_isnan.*' # Needs implementation
'|test_scatter.*' # Should be similar to ScatterAssign
'|test_constantofshape_int.*' # Needs implementation
- '|test_where.*' # Needs implementation
'|test_shrink.*' # Needs implementation
'|test_strnorm.*' # Needs implementation
'|test_nonzero.*' # Needs implementation
"caffe2/video/*",
"caffe2/distributed/*",
"caffe2/queue/*",
+ "caffe2/contrib/aten/*",
"binaries/*",
"caffe2/**/*_test*",
"caffe2/core/*",
return g.op("Relu", input)
+def ceil(g, input):
+ return g.op("Ceil", input)
+
+
+def floor(g, input):
+ return g.op("Floor", input)
+
+
@parse_args('v', 't', 't')
def threshold(g, self, threshold, value):
# See Note [Export inplace]
def where(g, condition, self, other):
- return g.op("ATen", condition, self, other, operator_s="where")
+ return g.op("Where", condition, self, other)
@parse_args('v', 'i', 'i')