Windows shared build (#13550)
authorArutyunovG <arutyunovg@yandex.ru>
Fri, 16 Nov 2018 20:06:21 +0000 (12:06 -0800)
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>
Fri, 16 Nov 2018 20:16:28 +0000 (12:16 -0800)
Summary:
Hi guys,

I'd like to build Caffe2 with more supported options in Windows with Microsoft Visual Studios.
This is the first pull request.
Running scripts/build_windows_shared.bat is able to build Caffe2 with both CMAKE_BUILD_TYPE=Debug and CMAKE_BUILD_TYPE=Release with Visual Studio 14 2015.
CUDA is 9.0, cudnn is 7.0.5, glog, gflags and lmdb are supported on my system.
Python is 3.5, Detectron works from python interface as well.
It was even possible to debug detectron code and step into caffe2_gpu.dll with pdbs built.

What is disappointing, that c10/experimental ops don't build with this Visual Studio generator, I added special option INCLUDE_EXPERIMENTAL_C10_OPS (default ON) to deal with it in build_windows_shared.bat.

After this pull request the next step is to add Visual Studio 2017 support in the script.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/13550

Reviewed By: ezyang

Differential Revision: D13042597

Pulled By: orionr

fbshipit-source-id: f313f909f599cd582a1d000eff766eef3a9fc4fc

73 files changed:
CMakeLists.txt
aten/src/ATen/core/TensorTypeId.cpp [deleted file]
aten/src/ATen/core/TensorTypeId.h
aten/src/ATen/core/TensorTypeIdRegistration.h
aten/src/THC/THCAllocator.cpp
aten/src/THC/THCAllocator.h
binaries/benchmark_helper.cc
binaries/convert_image_to_tensor.cc
binaries/speed_benchmark.cc
c10/util/IdWrapper.h
c10/util/SmallVector.h
c10/util/StringUtil.h
c10/util/TensorTypeId.cpp [new file with mode: 0644]
c10/util/TensorTypeId.h [new file with mode: 0644]
c10/util/TensorTypeIdRegistration.cpp [moved from aten/src/ATen/core/TensorTypeIdRegistration.cpp with 57% similarity]
c10/util/TensorTypeIdRegistration.h [new file with mode: 0644]
c10/util/string_utils.h [new file with mode: 0644]
caffe2/CMakeLists.txt
caffe2/contrib/aten/aten_op_template.h
caffe2/contrib/prof/htrace_async_dag_net_gpu.cc
caffe2/contrib/prof/htrace_dag_net.cc
caffe2/contrib/script/compiler.cc
caffe2/contrib/script/lexer.cc
caffe2/contrib/script/lexer.h
caffe2/core/common.h
caffe2/core/common_test.cc
caffe2/core/memonger.cc
caffe2/core/net_async_base.cc
caffe2/core/net_async_dag_gpu.cc
caffe2/core/net_async_tracing.cc
caffe2/core/numa.cc
caffe2/image/image_input_op.cc
caffe2/image/image_input_op.h
caffe2/image/image_input_op_gpu.cc
caffe2/mobile/contrib/arm-compute/test/gl_concat_op_test.cc
caffe2/mobile/contrib/opengl/operators/GLConcat.cc
caffe2/mobile/contrib/opengl/operators/GLConvolution.h
caffe2/mobile/contrib/opengl/operators/GLInstanceNorm.cc
caffe2/mobile/contrib/opengl/operators/GLPRelu.cc
caffe2/mobile/contrib/opengl/operators/GLPool.cc
caffe2/mobile/contrib/opengl/operators/GLSigmoid.cc
caffe2/mobile/contrib/opengl/operators/GLSoftmax.cc
caffe2/mobile/contrib/opengl/operators/GLStylizer.cc
caffe2/mobile/contrib/opengl/test/opengl_test.cc
caffe2/observers/runcnt_observer.cc
caffe2/onnx/backend.cc
caffe2/operators/CMakeLists.txt
caffe2/operators/bbox_transform_op.h
caffe2/operators/collect_and_distribute_fpn_rpn_proposals_op.h
caffe2/operators/do_op.h
caffe2/operators/h_softmax_op.cc
caffe2/operators/onnx_while_op.h
caffe2/operators/rnn/recurrent_network_blob_fetcher_op.h
caffe2/operators/rnn/recurrent_network_executor.h
caffe2/operators/segment_reduction_op.cc
caffe2/opt/backend_cutting.cc
caffe2/opt/backend_cutting_test.cc
caffe2/opt/converter_nomigraph_test.cc
caffe2/opt/device_test.cc
caffe2/opt/mobile_test.cc
caffe2/predictor/emulator/data_filler.cc
caffe2/predictor/emulator/std_output_formatter.h
caffe2/queue/queue_ops.h
caffe2/serialize/inline_container.h
caffe2/share/contrib/zstd/quant_decomp_zstd_op.cc
caffe2/transforms/pattern_net_transform.h
caffe2/utils/fatal_signal_asan_no_sig_test.cc
cmake/Dependencies.cmake
modules/CMakeLists.txt
modules/observers/net_observer_reporter_print.cc
modules/observers/perf_observer.cc
torch/csrc/jit/export.cpp
torch/csrc/jit/import.cpp

index 90e41b5..484e89b 100644 (file)
@@ -65,6 +65,7 @@ option(BUILD_DOCS "Build Caffe2 documentation" OFF)
 option(BUILD_CUSTOM_PROTOBUF "Build and use Caffe2's own protobuf under third_party" ON)
 option(BUILD_PYTHON "Build Python binaries" ON)
 option(BUILD_CAFFE2_OPS "Build Caffe2 operators" ON)
+option(BUILD_C10_EXPERIMENTAL_OPS "Build c10 experimental operators" ON)
 option(BUILD_SHARED_LIBS "Build libcaffe2.so" ON)
 cmake_dependent_option(
     CAFFE2_LINK_LOCAL_PROTOBUF "If set, build protobuf inside libcaffe2.so." ON
diff --git a/aten/src/ATen/core/TensorTypeId.cpp b/aten/src/ATen/core/TensorTypeId.cpp
deleted file mode 100644 (file)
index a825017..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-#include "ATen/core/TensorTypeId.h"
-#include "caffe2/utils/string_utils.h"
-
-namespace at {
-
-std::ostream& operator<<(std::ostream& str, at::TensorTypeId rhs) {
-  return str << caffe2::to_string(rhs.underlyingId());
-}
-
-} // namespace at
index ab73481..0ea08cd 100644 (file)
@@ -1,40 +1,2 @@
 #pragma once
-
-#include <iostream>
-#include <string>
-#include "c10/util/IdWrapper.h"
-#include "c10/macros/Macros.h"
-
-namespace at {
-
-namespace details {
-using _tensorTypeId_underlyingType = uint8_t;
-}
-
-/**
- * Dynamic type ID of a Tensor argument.  It represents something like
- * CPUTensor, etc.
- */
-class CAFFE2_API TensorTypeId final
-    : public at::
-          IdWrapper<TensorTypeId, details::_tensorTypeId_underlyingType> {
- public:
-  // Don't use this!
-  // Unfortunately, a default constructor needs to be defined because of
-  // https://reviews.llvm.org/D41223
-  constexpr TensorTypeId() noexcept : IdWrapper(0) {}
-
- private:
-  constexpr explicit TensorTypeId(
-      details::_tensorTypeId_underlyingType id) noexcept
-      : IdWrapper(id) {}
-
-  friend class TensorTypeIdCreator;
-  friend CAFFE2_API std::ostream& operator<<(std::ostream&, TensorTypeId);
-};
-
-CAFFE2_API std::ostream& operator<<(std::ostream&, at::TensorTypeId);
-
-} // namespace at
-
-C10_DEFINE_HASH_FOR_IDWRAPPER(at::TensorTypeId)
+#include <c10/util/TensorTypeId.h>
index a4dd44d..024ef69 100644 (file)
@@ -1,109 +1,2 @@
 #pragma once
-
-/**
- * To register your own tensor types, do in a header file:
- *   AT_DECLARE_TENSOR_TYPE(MY_TENSOR)
- * and in one (!) cpp file:
- *   AT_DEFINE_TENSOR_TYPE(MY_TENSOR)
- * Both must be in the same namespace.
- */
-
-#include "ATen/core/TensorTypeId.h"
-#include "c10/macros/Macros.h"
-
-#include <atomic>
-#include <mutex>
-#include <unordered_set>
-
-namespace at {
-
-class CAFFE2_API TensorTypeIdCreator final {
- public:
-  TensorTypeIdCreator();
-
-  at::TensorTypeId create();
-
-  static constexpr at::TensorTypeId undefined() noexcept {
-    return TensorTypeId(0);
-  }
-
- private:
-  std::atomic<details::_tensorTypeId_underlyingType> last_id_;
-
-  C10_DISABLE_COPY_AND_ASSIGN(TensorTypeIdCreator);
-};
-
-class CAFFE2_API TensorTypeIdRegistry final {
- public:
-  TensorTypeIdRegistry();
-
-  void registerId(at::TensorTypeId id);
-  void deregisterId(at::TensorTypeId id);
-
- private:
-  std::unordered_set<at::TensorTypeId> registeredTypeIds_;
-  std::mutex mutex_;
-
-  C10_DISABLE_COPY_AND_ASSIGN(TensorTypeIdRegistry);
-};
-
-class CAFFE2_API TensorTypeIds final {
- public:
-  static TensorTypeIds& singleton();
-
-  at::TensorTypeId createAndRegister();
-  void deregister(at::TensorTypeId id);
-
-  static constexpr at::TensorTypeId undefined() noexcept;
-
- private:
-  TensorTypeIds();
-
-  TensorTypeIdCreator creator_;
-  TensorTypeIdRegistry registry_;
-
-  C10_DISABLE_COPY_AND_ASSIGN(TensorTypeIds);
-};
-
-inline constexpr at::TensorTypeId TensorTypeIds::undefined() noexcept {
-  return TensorTypeIdCreator::undefined();
-}
-
-class CAFFE2_API TensorTypeIdRegistrar final {
- public:
-  TensorTypeIdRegistrar();
-  ~TensorTypeIdRegistrar();
-
-  at::TensorTypeId id() const noexcept;
-
- private:
-  at::TensorTypeId id_;
-
-  C10_DISABLE_COPY_AND_ASSIGN(TensorTypeIdRegistrar);
-};
-
-inline at::TensorTypeId TensorTypeIdRegistrar::id() const noexcept {
-  return id_;
-}
-
-#define AT_DECLARE_TENSOR_TYPE(TensorName) \
-  CAFFE2_API at::TensorTypeId TensorName()
-
-#define AT_DEFINE_TENSOR_TYPE(TensorName)           \
-  at::TensorTypeId TensorName() {                   \
-    static TensorTypeIdRegistrar registration_raii; \
-    return registration_raii.id();                  \
-  }
-
-AT_DECLARE_TENSOR_TYPE(UndefinedTensorId);
-AT_DECLARE_TENSOR_TYPE(CPUTensorId); // PyTorch/Caffe2 supported
-AT_DECLARE_TENSOR_TYPE(CUDATensorId); // PyTorch/Caffe2 supported
-AT_DECLARE_TENSOR_TYPE(SparseCPUTensorId); // PyTorch only
-AT_DECLARE_TENSOR_TYPE(SparseCUDATensorId); // PyTorch only
-AT_DECLARE_TENSOR_TYPE(MKLDNNTensorId); // Caffe2 only
-AT_DECLARE_TENSOR_TYPE(OpenGLTensorId); // Caffe2 only
-AT_DECLARE_TENSOR_TYPE(OpenCLTensorId); // Caffe2 only
-AT_DECLARE_TENSOR_TYPE(IDEEPTensorId); // Caffe2 only
-AT_DECLARE_TENSOR_TYPE(HIPTensorId); // Caffe2 only
-
-} // namespace at
+#include "c10/util/TensorTypeIdRegistration.h"
index a39d378..78b650a 100644 (file)
@@ -19,3 +19,6 @@ at::DataPtr THCIpcDeleter::makeDataPtr(void* data, int device) {
   auto* context = new THCIpcDeleter(data, device);
   return {data, context, &deleteTHCIpcDeleter, at::Device(at::DeviceType::CUDA, cur_device)};
 }
+
+THCIpcDeleter::THCIpcDeleter(void* data, int device)
+    : data_(data), device_(device) {}
index afe66b6..5ff8de1 100644 (file)
@@ -8,7 +8,7 @@
 #ifdef __cplusplus
 class CAFFE2_API THCIpcDeleter {
  public:
-  THCIpcDeleter(void* data, int device) : data_(data), device_(device) {};
+  THCIpcDeleter(void* data, int device);
   ~THCIpcDeleter();
   static at::DataPtr makeDataPtr(void* data, int device);
 private:
index f5be44d..1020dc1 100644 (file)
@@ -141,7 +141,7 @@ void loadInput(
         vector<string> input_dims_str = caffe2::split(',', input_dims_list[i]);
         vector<int> input_dims;
         for (const string& s : input_dims_str) {
-          input_dims.push_back(caffe2::stoi(s));
+          input_dims.push_back(c10::stoi(s));
         }
         caffe2::Blob* blob = workspace->GetBlob(input_names[i]);
         if (blob == nullptr) {
index fa6b298..31f1eda 100644 (file)
@@ -99,9 +99,9 @@ std::vector<float> convertToVector(cv::Mat& img) {
     } else if (step == "normalize") {
       normalize = {255, 255, 255};
     } else if (step == "mean") {
-      mean = {0.406, 0.456, 0.485};
+      mean = {0.406f, 0.456f, 0.485f};
     } else if (step == "std") {
-      std = {0.225, 0.224, 0.229};
+      std = {0.225f, 0.224f, 0.229f};
     } else if (step == "bgrtorgb") {
       bgrtorgb = true;
     } else {
@@ -143,9 +143,14 @@ std::vector<float> convertOneImage(std::string& filename) {
   assert(filename[0] != '~');
 
   std::cout << "Converting " << filename << std::endl;
+
   // Load image
   cv::Mat img = cv::imread(
+#if CV_MAJOR_VERSION <= 3
       filename, FLAGS_color ? CV_LOAD_IMAGE_COLOR : CV_LOAD_IMAGE_GRAYSCALE);
+#else
+      filename, FLAGS_color ? cv::IMREAD_COLOR : cv::IMREAD_GRAYSCALE);
+#endif
 
   cv::Mat crop = cropToSquare(img);
 
index 8915154..00f93f4 100644 (file)
@@ -127,7 +127,7 @@ int main(int argc, char** argv) {
         vector<string> input_dims_str = caffe2::split(',', input_dims_list[i]);
         vector<int> input_dims;
         for (const string& s : input_dims_str) {
-          input_dims.push_back(caffe2::stoi(s));
+          input_dims.push_back(c10::stoi(s));
         }
         caffe2::Blob* blob = workspace->GetBlob(input_names[i]);
         if (blob == nullptr) {
index d840502..fe32084 100644 (file)
@@ -23,7 +23,7 @@ namespace c10 {
  * for you, given the underlying type supports it.
  */
 template <class ConcreteType, class UnderlyingType>
-class CAFFE2_API IdWrapper {
+class C10_API IdWrapper {
  public:
   using underlying_type = UnderlyingType;
   using concrete_type = ConcreteType;
index 4d8de18..e44377b 100644 (file)
@@ -53,7 +53,7 @@ static inline uint64_t NextPowerOf2(uint64_t A) {
 } // namespace detail
 
 /// This is all the non-templated stuff common to all SmallVectors.
-class CAFFE2_API SmallVectorBase {
+class C10_API SmallVectorBase {
  protected:
   void *BeginX, *EndX, *CapacityX;
 
index 0f9d5ab..bbd16f8 100644 (file)
@@ -2,6 +2,7 @@
 #define C10_UTIL_STRINGUTIL_H_
 
 #include <c10/macros/Macros.h>
+#include <c10/util/string_utils.h>
 
 #include <cstddef>
 #include <ostream>
@@ -73,28 +74,6 @@ struct C10_API SourceLocation {
 
 std::ostream& operator<<(std::ostream& out, const SourceLocation& loc);
 
-/// Portable implementation of std::stoi, which works for Android builds.
-///
-/// TODO: You won't be able to call this unqualified, because ADL means that it
-/// will be ambiguous with std::stoi.  Maybe we should fix this by giving
-/// our version a different name.
-inline int stoi(const std::string& str) {
-#if defined(__ANDROID__)
-  std::stringstream ss;
-  int n = 0;
-  ss << str;
-  ss >> n;
-  return n;
-#else
-  return std::stoi(str);
-#endif // defined(__ANDROID__)
-}
-
 } // namespace c10
 
-// TODO: Remove me when namespace unification occurs
-namespace at {
-using c10::stoi;
-}
-
 #endif // C10_UTIL_STRINGUTIL_H_
diff --git a/c10/util/TensorTypeId.cpp b/c10/util/TensorTypeId.cpp
new file mode 100644 (file)
index 0000000..c51c31e
--- /dev/null
@@ -0,0 +1,10 @@
+#include "c10/util/TensorTypeId.h"
+#include "c10/util/string_utils.h"
+
+namespace c10 {
+
+std::ostream& operator<<(std::ostream& str, c10::TensorTypeId rhs) {
+  return str << c10::to_string(rhs.underlyingId());
+}
+
+} // namespace c10
diff --git a/c10/util/TensorTypeId.h b/c10/util/TensorTypeId.h
new file mode 100644 (file)
index 0000000..6f6c2ad
--- /dev/null
@@ -0,0 +1,43 @@
+#ifndef TENSOR_TYPE_ID_H_
+#define TENSOR_TYPE_ID_H_
+
+#include <iostream>
+#include <string>
+#include "c10/macros/Macros.h"
+#include "c10/util/IdWrapper.h"
+
+namespace c10 {
+
+namespace details {
+using _tensorTypeId_underlyingType = uint8_t;
+}
+
+/**
+ * Dynamic type ID of a Tensor argument.  It represents something like
+ * CPUTensor, etc.
+ */
+class C10_API TensorTypeId final
+    : public at::
+          IdWrapper<TensorTypeId, details::_tensorTypeId_underlyingType> {
+ public:
+  // Don't use this!
+  // Unfortunately, a default constructor needs to be defined because of
+  // https://reviews.llvm.org/D41223
+  constexpr TensorTypeId() noexcept : IdWrapper(0) {}
+
+ private:
+  constexpr explicit TensorTypeId(
+      details::_tensorTypeId_underlyingType id) noexcept
+      : IdWrapper(id) {}
+
+  friend class TensorTypeIdCreator;
+  friend C10_API std::ostream& operator<<(std::ostream&, TensorTypeId);
+};
+
+C10_API std::ostream& operator<<(std::ostream&, c10::TensorTypeId);
+
+} // namespace c10
+
+C10_DEFINE_HASH_FOR_IDWRAPPER(c10::TensorTypeId)
+
+#endif // TENSOR_TYPE_ID_H_
similarity index 57%
rename from aten/src/ATen/core/TensorTypeIdRegistration.cpp
rename to c10/util/TensorTypeIdRegistration.cpp
index 5f88916..9b45254 100644 (file)
@@ -1,8 +1,8 @@
-#include <ATen/core/TensorTypeIdRegistration.h>
+#include <c10/util/TensorTypeIdRegistration.h>
 #include <c10/util/C++17.h>
 #include <c10/util/Exception.h>
 
-namespace at {
+namespace c10 {
 
 TensorTypeIds::TensorTypeIds() : creator_(), registry_() {}
 
@@ -13,8 +13,7 @@ TensorTypeIds& TensorTypeIds::singleton() {
 
 TensorTypeIdCreator::TensorTypeIdCreator() : last_id_(0) {}
 
-at::TensorTypeId TensorTypeIdCreator::create() {
-
+c10::TensorTypeId TensorTypeIdCreator::create() {
   auto id = TensorTypeId(++last_id_);
 
   if (last_id_ == 0) { // overflow happened!
@@ -31,23 +30,23 @@ at::TensorTypeId TensorTypeIdCreator::create() {
 
 TensorTypeIdRegistry::TensorTypeIdRegistry() : registeredTypeIds_(), mutex_() {}
 
-void TensorTypeIdRegistry::registerId(at::TensorTypeId id) {
+void TensorTypeIdRegistry::registerId(c10::TensorTypeId id) {
   std::lock_guard<std::mutex> lock(mutex_);
   registeredTypeIds_.emplace(id);
 }
 
-void TensorTypeIdRegistry::deregisterId(at::TensorTypeId id) {
+void TensorTypeIdRegistry::deregisterId(c10::TensorTypeId id) {
   std::lock_guard<std::mutex> lock(mutex_);
   registeredTypeIds_.erase(id);
 }
 
-at::TensorTypeId TensorTypeIds::createAndRegister() {
-  at::TensorTypeId id = creator_.create();
+c10::TensorTypeId TensorTypeIds::createAndRegister() {
+  c10::TensorTypeId id = creator_.create();
   registry_.registerId(id);
   return id;
 }
 
-void TensorTypeIds::deregister(at::TensorTypeId id) {
+void TensorTypeIds::deregister(c10::TensorTypeId id) {
   registry_.deregisterId(id);
 }
 
@@ -58,15 +57,15 @@ TensorTypeIdRegistrar::~TensorTypeIdRegistrar() {
   TensorTypeIds::singleton().deregister(id_);
 }
 
-AT_DEFINE_TENSOR_TYPE(UndefinedTensorId);
-AT_DEFINE_TENSOR_TYPE(CPUTensorId);
-AT_DEFINE_TENSOR_TYPE(CUDATensorId);
-AT_DEFINE_TENSOR_TYPE(SparseCPUTensorId);
-AT_DEFINE_TENSOR_TYPE(SparseCUDATensorId);
-AT_DEFINE_TENSOR_TYPE(MKLDNNTensorId); // Caffe2 only
-AT_DEFINE_TENSOR_TYPE(OpenGLTensorId); // Caffe2 only
-AT_DEFINE_TENSOR_TYPE(OpenCLTensorId); // Caffe2 only
-AT_DEFINE_TENSOR_TYPE(IDEEPTensorId); // Caffe2 only
-AT_DEFINE_TENSOR_TYPE(HIPTensorId); // Caffe2 only
-
-} // namespace at
+C10_DEFINE_TENSOR_TYPE(UndefinedTensorId);
+C10_DEFINE_TENSOR_TYPE(CPUTensorId);
+C10_DEFINE_TENSOR_TYPE(CUDATensorId);
+C10_DEFINE_TENSOR_TYPE(SparseCPUTensorId);
+C10_DEFINE_TENSOR_TYPE(SparseCUDATensorId);
+C10_DEFINE_TENSOR_TYPE(MKLDNNTensorId); // Caffe2 only
+C10_DEFINE_TENSOR_TYPE(OpenGLTensorId); // Caffe2 only
+C10_DEFINE_TENSOR_TYPE(OpenCLTensorId); // Caffe2 only
+C10_DEFINE_TENSOR_TYPE(IDEEPTensorId); // Caffe2 only
+C10_DEFINE_TENSOR_TYPE(HIPTensorId); // Caffe2 only
+
+} // namespace c10
diff --git a/c10/util/TensorTypeIdRegistration.h b/c10/util/TensorTypeIdRegistration.h
new file mode 100644 (file)
index 0000000..a926fdf
--- /dev/null
@@ -0,0 +1,112 @@
+#ifndef TENSOR_TYPE_ID_REGISTRATION_H_
+#define TENSOR_TYPE_ID_REGISTRATION_H_
+
+/**
+ * To register your own tensor types, do in a header file:
+ *   C10_DECLARE_TENSOR_TYPE(MY_TENSOR)
+ * and in one (!) cpp file:
+ *   C10_DEFINE_TENSOR_TYPE(MY_TENSOR)
+ * Both must be in the same namespace.
+ */
+
+#include "c10/macros/Macros.h"
+#include "c10/util/TensorTypeId.h"
+
+#include <atomic>
+#include <mutex>
+#include <unordered_set>
+
+namespace c10 {
+
+class C10_API TensorTypeIdCreator final {
+ public:
+  TensorTypeIdCreator();
+
+  c10::TensorTypeId create();
+
+  static constexpr c10::TensorTypeId undefined() noexcept {
+    return c10::TensorTypeId(0);
+  }
+
+ private:
+  std::atomic<details::_tensorTypeId_underlyingType> last_id_;
+
+  C10_DISABLE_COPY_AND_ASSIGN(TensorTypeIdCreator);
+};
+
+class C10_API TensorTypeIdRegistry final {
+ public:
+  TensorTypeIdRegistry();
+
+  void registerId(c10::TensorTypeId id);
+  void deregisterId(c10::TensorTypeId id);
+
+ private:
+  std::unordered_set<c10::TensorTypeId> registeredTypeIds_;
+  std::mutex mutex_;
+
+  C10_DISABLE_COPY_AND_ASSIGN(TensorTypeIdRegistry);
+};
+
+class C10_API TensorTypeIds final {
+ public:
+  static TensorTypeIds& singleton();
+
+  c10::TensorTypeId createAndRegister();
+  void deregister(c10::TensorTypeId id);
+
+  static constexpr c10::TensorTypeId undefined() noexcept;
+
+ private:
+  TensorTypeIds();
+
+  TensorTypeIdCreator creator_;
+  TensorTypeIdRegistry registry_;
+
+  C10_DISABLE_COPY_AND_ASSIGN(TensorTypeIds);
+};
+
+inline constexpr c10::TensorTypeId TensorTypeIds::undefined() noexcept {
+  return TensorTypeIdCreator::undefined();
+}
+
+class C10_API TensorTypeIdRegistrar final {
+ public:
+  TensorTypeIdRegistrar();
+  ~TensorTypeIdRegistrar();
+
+  c10::TensorTypeId id() const noexcept;
+
+ private:
+  c10::TensorTypeId id_;
+
+  C10_DISABLE_COPY_AND_ASSIGN(TensorTypeIdRegistrar);
+};
+
+inline c10::TensorTypeId TensorTypeIdRegistrar::id() const noexcept {
+  return id_;
+}
+
+#define C10_DECLARE_TENSOR_TYPE(TensorName) \
+  C10_API c10::TensorTypeId TensorName()
+
+#define C10_DEFINE_TENSOR_TYPE(TensorName)          \
+  c10::TensorTypeId TensorName() {                  \
+    static TensorTypeIdRegistrar registration_raii; \
+    return registration_raii.id();                  \
+  }
+
+C10_DECLARE_TENSOR_TYPE(UndefinedTensorId);
+C10_DECLARE_TENSOR_TYPE(CPUTensorId); // PyTorch/Caffe2 supported
+C10_DECLARE_TENSOR_TYPE(CUDATensorId); // PyTorch/Caffe2 supported
+C10_DECLARE_TENSOR_TYPE(SparseCPUTensorId); // PyTorch only
+C10_DECLARE_TENSOR_TYPE(SparseCUDATensorId); // PyTorch only
+C10_DECLARE_TENSOR_TYPE(MKLDNNTensorId); // Caffe2 only
+C10_DECLARE_TENSOR_TYPE(OpenGLTensorId); // Caffe2 only
+C10_DECLARE_TENSOR_TYPE(OpenCLTensorId); // Caffe2 only
+C10_DECLARE_TENSOR_TYPE(IDEEPTensorId); // Caffe2 only
+C10_DECLARE_TENSOR_TYPE(HIPTensorId); // Caffe2 only
+
+} // namespace c10
+
+#endif // TENSOR_TYPE_ID_REGISTRATION_H_
diff --git a/c10/util/string_utils.h b/c10/util/string_utils.h
new file mode 100644 (file)
index 0000000..df4d271
--- /dev/null
@@ -0,0 +1,60 @@
+#pragma once
+
+#include <sstream>
+#include <string>
+
+using std::string;
+
+namespace c10 {
+
+// to_string, stoi and stod implementation for Android related stuff.
+// Note(jiayq): Do not use the CAFFE2_TESTONLY_FORCE_STD_STRING_TEST macro
+// outside testing code that lives under common_test.cc
+#if defined(__ANDROID__) || defined(CAFFE2_TESTONLY_FORCE_STD_STRING_TEST)
+#define CAFFE2_TESTONLY_WE_ARE_USING_CUSTOM_STRING_FUNCTIONS 1
+template <typename T>
+std::string to_string(T value) {
+  std::ostringstream os;
+  os << value;
+  return os.str();
+}
+
+inline int stoi(const string& str) {
+  std::stringstream ss;
+  int n = 0;
+  ss << str;
+  ss >> n;
+  return n;
+}
+
+inline uint64_t stoull(const string& str) {
+  std::stringstream ss;
+  uint64_t n = 0;
+  ss << str;
+  ss >> n;
+  return n;
+}
+
+inline double stod(const string& str, std::size_t* pos = 0) {
+  std::stringstream ss;
+  ss << str;
+  double val = 0;
+  ss >> val;
+  if (pos) {
+    if (ss.tellg() == std::streampos(-1)) {
+      *pos = str.size();
+    } else {
+      *pos = ss.tellg();
+    }
+  }
+  return val;
+}
+#else
+#define CAFFE2_TESTONLY_WE_ARE_USING_CUSTOM_STRING_FUNCTIONS 0
+using std::stod;
+using std::stoi;
+using std::stoull;
+using std::to_string;
+#endif // defined(__ANDROID__) || defined(CAFFE2_FORCE_STD_STRING_FALLBACK_TEST)
+
+} // namespace c10
index 802ef2f..f8bc85a 100644 (file)
@@ -524,6 +524,7 @@ if (BUILD_PYTHON)
       caffe2_pybind11_state caffe2_library)
   if (WIN32)
     target_link_libraries(caffe2_pybind11_state ${PYTHON_LIBRARIES})
+    target_link_libraries(caffe2_pybind11_state onnx_proto)
   endif(WIN32)
 
   # Install caffe2_pybind11_state(_gpu|hip) in site-packages/caffe2/python,
@@ -548,6 +549,7 @@ if (BUILD_PYTHON)
         caffe2_pybind11_state_gpu caffe2_library caffe2_gpu_library)
     if (WIN32)
       target_link_libraries(caffe2_pybind11_state_gpu ${PYTHON_LIBRARIES})
+      target_link_libraries(caffe2_pybind11_state_gpu onnx_proto)
     endif(WIN32)
 
     # Install with same rpath as non-gpu caffe2_pybind11_state
index b6d3126..8b9c69c 100644 (file)
@@ -167,7 +167,7 @@ private:
       descriptor << "-" << a;
 
     std::string descriptor_sized =
-        descriptor.str() + "-" + caffe2::to_string(InputSize());
+        descriptor.str() + "-" + c10::to_string(InputSize());
     std::string descriptor_var_args = descriptor.str() + "-*";
     if (op_to_key.count(descriptor_sized) > 0) {
       return op_to_key[descriptor_sized];
index fe67940..97da270 100644 (file)
@@ -39,7 +39,7 @@ class HTraceAsyncDAGNet : public AsyncDAGNet {
     htrace::Scope run_scope(
         htrace_tracer_,
         htrace_root_scope_.GetSpanId(),
-        "run-scope-" + caffe2::to_string(run_count_++));
+        "run-scope-" + c10::to_string(run_count_++));
     return AsyncDAGNet::DoRunAsync();
   }
 
index a802cfc..a394cc5 100644 (file)
@@ -43,7 +43,7 @@ class HTraceDAGNet : public DAGNetBase {
     htrace::Scope run_scope(
         htrace_tracer_,
         htrace_root_scope_.GetSpanId(),
-        "run-scope-" + caffe2::to_string(run_count_++));
+        "run-scope-" + c10::to_string(run_count_++));
     return DAGNetBase::DoRunAsync();
   }
 
@@ -64,8 +64,7 @@ class HTraceDAGNet : public DAGNetBase {
       htrace::Scope operator_scope(
           htrace_tracer_,
           worker_scope->GetSpanId(),
-          "#" + caffe2::to_string(idx) + " (" + print_name + ", " + op_type +
-              ")");
+          "#" + c10::to_string(idx) + " (" + print_name + ", " + op_type + ")");
       success &= operator_nodes_[idx].operator_->Run();
     }
     return success;
index fc7c183..16a7657 100644 (file)
@@ -216,7 +216,7 @@ struct DefCompiler {
     }
   }
   std::string fresh(std::string prefix = "$t") {
-    return std::string(prefix) + caffe2::to_string(next_fresh++);
+    return std::string(prefix) + c10::to_string(next_fresh++);
   }
   const char* operatorName(int kind, int ninputs) {
     switch (kind) {
@@ -252,7 +252,7 @@ struct DefCompiler {
       case TK_NOT:
         return "Not";
       default:
-        throw std::runtime_error("unknown kind " + caffe2::to_string(kind));
+        throw std::runtime_error("unknown kind " + c10::to_string(kind));
     }
   }
   void fillArg(Argument* arg, const Attribute& attr) {
@@ -598,7 +598,7 @@ struct DefCompiler {
         return TensorProto_DataType_BOOL;
       default:
         throw std::runtime_error(
-            "expected type token: " + caffe2::to_string(type));
+            "expected type token: " + c10::to_string(type));
     }
   }
 
index 2f788e3..9dafea9 100644 (file)
@@ -14,7 +14,7 @@ std::string kindToString(int kind) {
     TC_FORALL_TOKEN_KINDS(DEFINE_CASE)
 #undef DEFINE_CASE
     default:
-      throw std::runtime_error("unknown kind: " + caffe2::to_string(kind));
+      throw std::runtime_error("unknown kind: " + c10::to_string(kind));
   }
 }
 
index ddcc672..b298809 100644 (file)
@@ -358,7 +358,7 @@ struct Token {
   double doubleValue() {
     assert(TK_NUMBER == kind);
     size_t idx;
-    double r = ::caffe2::stod(text(), &idx);
+    double r = ::c10::stod(text(), &idx);
     assert(idx == range.size());
     return r;
   }
index fb89414..132f7ba 100644 (file)
@@ -29,6 +29,8 @@
 
 #include "c10/macros/Macros.h"
 
+#include "c10/util/string_utils.h"
+
 namespace caffe2 {
 
 // Note(Yangqing): NVCC does not play well with unordered_map on some platforms,
@@ -125,57 +127,6 @@ make_unique(Args&&...) = delete;
 
 #endif
 
-// to_string, stoi and stod implementation for Android related stuff.
-// Note(jiayq): Do not use the CAFFE2_TESTONLY_FORCE_STD_STRING_TEST macro
-// outside testing code that lives under common_test.cc
-#if defined(__ANDROID__) || defined(CAFFE2_TESTONLY_FORCE_STD_STRING_TEST)
-#define CAFFE2_TESTONLY_WE_ARE_USING_CUSTOM_STRING_FUNCTIONS 1
-template <typename T>
-std::string to_string(T value)
-{
-  std::ostringstream os;
-  os << value;
-  return os.str();
-}
-
-inline int stoi(const string& str) {
-  std::stringstream ss;
-  int n = 0;
-  ss << str;
-  ss >> n;
-  return n;
-}
-
-inline uint64_t stoull(const string& str) {
-  std::stringstream ss;
-  uint64_t n = 0;
-  ss << str;
-  ss >> n;
-  return n;
-}
-
-inline double stod(const string& str, std::size_t* pos = 0) {
-  std::stringstream ss;
-  ss << str;
-  double val = 0;
-  ss >> val;
-  if (pos) {
-    if (ss.tellg() == std::streampos(-1)) {
-      *pos = str.size();
-    } else {
-      *pos = ss.tellg();
-    }
-  }
-  return val;
-}
-#else
-#define CAFFE2_TESTONLY_WE_ARE_USING_CUSTOM_STRING_FUNCTIONS 0
-using std::to_string;
-using std::stoi;
-using std::stoull;
-using std::stod;
-#endif // defined(__ANDROID__) || defined(CAFFE2_FORCE_STD_STRING_FALLBACK_TEST)
-
 #if defined(__ANDROID__) && !defined(__NDK_MAJOR__)
 using ::round;
 #else
@@ -238,6 +189,6 @@ CAFFE2_API void SetHipRuntimeFlag();
 // CMake)
 CAFFE2_API const std::map<string, string>& GetBuildOptions();
 
-}  // namespace caffe2
+} // namespace caffe2
 
 #endif  // CAFFE2_CORE_COMMON_H_
index 8900a78..dfada6d 100644 (file)
@@ -17,7 +17,7 @@ TEST(CommonTest, TestStoi) {
   EXPECT_TRUE(CAFFE2_TESTONLY_WE_ARE_USING_CUSTOM_STRING_FUNCTIONS);
   string s = "1234";
   int i_std = std::stoi(s);
-  int i_caffe2 = ::caffe2::stoi(s);
+  int i_caffe2 = ::c10::stoi(s);
   EXPECT_EQ(i_std, i_caffe2);
 }
 
@@ -26,14 +26,14 @@ TEST(CommonTest, TestStod) {
   string s = "1.234";
   std::size_t p_std = 0, p_caffe2 = 0;
   double d_std = std::stod(s, &p_std);
-  double d_caffe2 = ::caffe2::stod(s, &p_caffe2);
+  double d_caffe2 = ::c10::stod(s, &p_caffe2);
   EXPECT_EQ(d_std, d_caffe2);
   EXPECT_EQ(p_std, p_caffe2);
 
   // Only part of the string is parsed.
   s = "1.234 5.678";
   d_std = std::stod(s, &p_std);
-  d_caffe2 = ::caffe2::stod(s, &p_caffe2);
+  d_caffe2 = ::c10::stod(s, &p_caffe2);
   EXPECT_EQ(d_std, d_caffe2);
   EXPECT_EQ(p_std, p_caffe2);
 }
index 87633fa..c391651 100644 (file)
@@ -67,7 +67,7 @@ NetDef optimize_inference_net(
 
           // Safety check to prevent double-memongering nets.
           string shared_blob =
-              "__m" + caffe2::to_string(renaming.size()) + "_shared";
+              "__m" + c10::to_string(renaming.size()) + "_shared";
           if (all_blobs.find(shared_blob) != all_blobs.end()) {
             LOG(INFO) << "Net was already memongered!";
             return net;
@@ -211,7 +211,7 @@ class ComputeBlobRecyclingForDag {
         if (renamed.find(mapped_blob.second) == renamed.end()) {
           renamed.insert(
               {mapped_blob.second,
-               namescope + "__m" + caffe2::to_string(name_idx++) + "_shared"});
+               namescope + "__m" + c10::to_string(name_idx++) + "_shared"});
         }
       } else {
         renamed.insert({mapped_blob.second, mapped_blob.second});
index 894277e..e0ad1bc 100644 (file)
@@ -182,10 +182,10 @@ TaskThreadPoolBase* AsyncNetBase::pool(const DeviceOption& device_option) {
     auto gpu_id = device_option.device_id();
     CAFFE_ENFORCE(
         gpu_id >= 0 && gpu_id < FLAGS_caffe2_net_async_max_gpus,
-        "Invalid GPU id: " + caffe2::to_string(gpu_id));
+        "Invalid GPU id: " + c10::to_string(gpu_id));
     return poolGetter(gpu_pools_, device_type, gpu_id, num_workers_);
   } else {
-    CAFFE_THROW("Unsupported device type " + caffe2::to_string(device_type));
+    CAFFE_THROW("Unsupported device type " + c10::to_string(device_type));
   }
 }
 
@@ -194,7 +194,7 @@ int AsyncNetBase::stream(int task_id) {
   int stream_id = 0;
   if (IsGPUDeviceType(device_option.device_type())) {
     int gpu_id = device_option.device_id();
-    CAFFE_ENFORCE_GE(gpu_id, 0, "Invalid gpu id: " + caffe2::to_string(gpu_id));
+    CAFFE_ENFORCE_GE(gpu_id, 0, "Invalid gpu id: " + c10::to_string(gpu_id));
     if ((unsigned)gpu_id >= getStreamCounters().size()) {
       getStreamCounters().resize(gpu_id + 1, 0);
     }
index 674b88e..3112a09 100644 (file)
@@ -111,7 +111,7 @@ int AsyncDAGNet::stream(const DeviceOption& device_option) {
   int stream_id = 0;
   if (device_option.device_type() == PROTO_CUDA) {
     int gpu_id = device_option.device_id();
-    CAFFE_ENFORCE_GE(gpu_id, 0, "Invalid gpu id: " + caffe2::to_string(gpu_id));
+    CAFFE_ENFORCE_GE(gpu_id, 0, "Invalid gpu id: " + c10::to_string(gpu_id));
     if ((unsigned)gpu_id >= stream_counters_.size()) {
       stream_counters_.resize(gpu_id + 1, 0);
     }
index aebd936..bbca76c 100644 (file)
@@ -64,7 +64,7 @@ Tracer::Tracer(
       config_(config) {
   std::replace(filename_.begin(), filename_.end(), '/', '_');
   filename_ = this->config().filepath + "/" + filename_ + "_id_" +
-      caffe2::to_string(getCounterForNetName(net_name));
+      c10::to_string(getCounterForNetName(net_name));
   timer_.Start();
 }
 
@@ -81,7 +81,7 @@ std::string Tracer::opTraceName(const OperatorBase* op) {
   int unique_shard_id =
       op->has_debug_def() ? getUniqueShardId(op->debug_def()) : -1;
   if (unique_shard_id != -1) {
-    return op->type() + ":" + caffe2::to_string(unique_shard_id);
+    return op->type() + ":" + c10::to_string(unique_shard_id);
   } else {
     return op->type();
   }
@@ -366,7 +366,7 @@ int extractShardId(const std::string& name) {
     while (right_pos < name.length() && isdigit(name[right_pos])) {
       right_pos++;
     }
-    return caffe2::stoi(name.substr(left_pos, right_pos - left_pos));
+    return c10::stoi(name.substr(left_pos, right_pos - left_pos));
   } else {
     return -1;
   }
@@ -463,7 +463,7 @@ bool startIter(const std::shared_ptr<Tracer>& tracer) {
   tracer->setEnabled(is_enabled);
   if (should_dump) {
     int dumping_iter = tracer->bumpDumpingIter();
-    tracer->dumpTracingResultAndClearEvents(caffe2::to_string(dumping_iter));
+    tracer->dumpTracingResultAndClearEvents(c10::to_string(dumping_iter));
   }
   return is_enabled;
 }
index 0a59c93..953f162 100644 (file)
@@ -26,7 +26,7 @@ void NUMABind(int numa_node_id) {
 
   CAFFE_ENFORCE(
       numa_node_id <= numa_max_node(),
-      "NUMA node id " + caffe2::to_string(numa_node_id) + " is unavailable");
+      "NUMA node id " + c10::to_string(numa_node_id) + " is unavailable");
 
   auto bm = numa_allocate_nodemask();
   numa_bitmask_clearall(bm);
index 25fe290..4af9328 100644 (file)
@@ -2,6 +2,13 @@
 
 namespace caffe2 {
 
+template <>
+bool ImageInputOp<CPUContext>::ApplyTransformOnGPU(
+    const std::vector<std::int64_t>&,
+    const c10::Device&) {
+  return false;
+}
+
 REGISTER_CPU_OPERATOR(ImageInput, ImageInputOp<CPUContext>);
 
 OPERATOR_SCHEMA(ImageInput)
index 4aa96b5..321d017 100644 (file)
@@ -83,6 +83,9 @@ class ImageInputOp final
   void DecodeAndTransposeOnly(
       const std::string& value, uint8_t *image_data, int item_id,
       const int channels, std::size_t thread_index);
+  bool ApplyTransformOnGPU(
+      const std::vector<std::int64_t>& dims,
+      const c10::Device& type);
 
   unique_ptr<db::DBReader> owned_reader_;
   const db::DBReader* reader_;
@@ -1206,7 +1209,7 @@ bool ImageInputOp<Context>::Prefetch() {
       max_decode_error_ratio_) {
     throw std::runtime_error(
         "max_decode_error_ratio exceeded " +
-        caffe2::to_string(max_decode_error_ratio_));
+        c10::to_string(max_decode_error_ratio_));
   }
 
   // If the context is not CPUContext, we will need to do a copy in the
@@ -1267,22 +1270,10 @@ bool ImageInputOp<Context>::CopyPrefetched() {
       const int N = X.dim32(0), C = X.dim32(3), H = X.dim32(1), W = X.dim32(2);
       // data goes out as NCHW
       auto dims = std::vector<int64_t>{N, C, H, W};
-      // GPU transform kernel allows explicitly setting output type
-      if (output_type_ == TensorProto_DataType_FLOAT) {
-        auto* image_output = OperatorBase::OutputTensor(
-            0, dims, at::dtype<float>().device(type));
-        TransformOnGPU<uint8_t,float,Context>(prefetched_image_on_device_,
-                                              image_output, mean_gpu_,
-                                              std_gpu_, &context_);
-      } else if (output_type_ == TensorProto_DataType_FLOAT16) {
-        auto* image_output = OperatorBase::OutputTensor(
-            0, dims, at::dtype<at::Half>().device(type));
-        TransformOnGPU<uint8_t,at::Half,Context>(prefetched_image_on_device_,
-                                                image_output, mean_gpu_,
-                                                std_gpu_, &context_);
-      }  else {
+      if (!ApplyTransformOnGPU(dims, type)) {
         return false;
       }
+
     } else {
       OperatorBase::OutputTensorCopyFrom(
           0, type, prefetched_image_on_device_, &context_);
index c69889c..56d2f3d 100644 (file)
@@ -4,6 +4,35 @@
 
 namespace caffe2 {
 
+template <>
+bool ImageInputOp<CUDAContext>::ApplyTransformOnGPU(
+    const std::vector<std::int64_t>& dims,
+    const c10::Device& type) {
+  // GPU transform kernel allows explicitly setting output type
+  if (output_type_ == TensorProto_DataType_FLOAT) {
+    auto* image_output =
+        OperatorBase::OutputTensor(0, dims, at::dtype<float>().device(type));
+    TransformOnGPU<uint8_t, float, CUDAContext>(
+        prefetched_image_on_device_,
+        image_output,
+        mean_gpu_,
+        std_gpu_,
+        &context_);
+  } else if (output_type_ == TensorProto_DataType_FLOAT16) {
+    auto* image_output =
+        OperatorBase::OutputTensor(0, dims, at::dtype<at::Half>().device(type));
+    TransformOnGPU<uint8_t, at::Half, CUDAContext>(
+        prefetched_image_on_device_,
+        image_output,
+        mean_gpu_,
+        std_gpu_,
+        &context_);
+  } else {
+    return false;
+  }
+  return true;
+}
+
 REGISTER_CUDA_OPERATOR(ImageInput, ImageInputOp<CUDAContext>);
 
 }  // namespace caffe2
index 5676521..c042602 100644 (file)
@@ -16,14 +16,18 @@ TEST(OPENGLOperatorTest, Concat) {
     int H = 8;
     int W = 8;
     for (int i = 0; i < Cs.size(); ++i) {
-      PopulateCPUBlob(&ws, true, std::string("cpu_X") + caffe2::to_string(i), {batchSize, Cs[i], H, W});
+      PopulateCPUBlob(
+          &ws,
+          true,
+          std::string("cpu_X") + c10::to_string(i),
+          {batchSize, Cs[i], H, W});
     }
 
   NetDef cpu_net;
   {
     OperatorDef* def = AddOp(&cpu_net, "Concat", {}, {"ref_Y", "cpu_dummy"});
       for (int i = 0; i < Cs.size(); ++i ) {
-        def->add_input(std::string("cpu_X") + caffe2::to_string(i));
+        def->add_input(std::string("cpu_X") + c10::to_string(i));
       }
   }
 
@@ -33,7 +37,7 @@ TEST(OPENGLOperatorTest, Concat) {
     OperatorDef* def = AddOp(&gpu_net, "Concat", {}, {"gpu_Y", "gpu_dummy"});
     MAKE_OPENGL_OPERATOR(def);
     for (int i = 0; i < Cs.size(); ++i ) {
-      def->add_input(std::string("cpu_X") + caffe2::to_string(i));
+      def->add_input(std::string("cpu_X") + c10::to_string(i));
     }
   }
 
index 5da06de..a3d8bfc 100644 (file)
@@ -19,18 +19,24 @@ class GLConcat : public GLFilter {
   binding* input_tile_x;
 
   GLConcat(tile_descriptor output_tile_geometries, bool tiling = false)
-      : GLFilter("GLConcat",
-                 vertex_shader,
-                 fragment_shader,
-                 std::vector<binding*>(
-                     {BINDING(outputSize), BINDING(inputData), BINDING(inputTileRange), BINDING(input_tile_x)}),
-                 {/* no uniform blocks */},
-                 {/* no attributes */},
-                 {{"TILING", caffe2::to_string(tiling)},
-                  {"OUTPUT_TILES", caffe2::to_string(output_tile_geometries.tiles)},
-                  {"OUTPUT_TILE_X", caffe2::to_string(output_tile_geometries.tile_dims.x)},
-                  {"OUTPUT_TILE_WIDTH", caffe2::to_string(output_tile_geometries.tile_size.x)},
-                  {"OUTPUT_TILE_HEIGHT", caffe2::to_string(output_tile_geometries.tile_size.y)}}),
+      : GLFilter(
+            "GLConcat",
+            vertex_shader,
+            fragment_shader,
+            std::vector<binding*>({BINDING(outputSize),
+                                   BINDING(inputData),
+                                   BINDING(inputTileRange),
+                                   BINDING(input_tile_x)}),
+            {/* no uniform blocks */},
+            {/* no attributes */},
+            {{"TILING", c10::to_string(tiling)},
+             {"OUTPUT_TILES", c10::to_string(output_tile_geometries.tiles)},
+             {"OUTPUT_TILE_X",
+              c10::to_string(output_tile_geometries.tile_dims.x)},
+             {"OUTPUT_TILE_WIDTH",
+              c10::to_string(output_tile_geometries.tile_size.x)},
+             {"OUTPUT_TILE_HEIGHT",
+              c10::to_string(output_tile_geometries.tile_size.y)}}),
         tiling_(tiling) {}
 
   template <typename T>
index fe6e6a0..e6713a8 100644 (file)
@@ -76,47 +76,42 @@ class GLConvolution : public GLFilter {
                 _output_tile_batch_size,
                 _prelu_scale != nullptr),
             {/* no attributes */},
-            {{"KERNEL_SIZE_X", caffe2::to_string(_geometry.kernel_size.x)},
-             {"KERNEL_SIZE_Y", caffe2::to_string(_geometry.kernel_size.y)},
-             {"INPUT_BATCH_SIZE", caffe2::to_string(_input_batch_size)},
-             {"OUTPUT_BATCH_SIZE", caffe2::to_string(_output_batch_size)},
-             {"INPUT_TILES", caffe2::to_string(_input_tiles)},
-             {"OUTPUT_TILES", caffe2::to_string(_output_tiles)},
-             {"INPUT_TILE_WIDTH",
-              caffe2::to_string(_geometry.input_tile_size.x)},
-             {"INPUT_TILE_HEIGHT",
-              caffe2::to_string(_geometry.input_tile_size.y)},
+            {{"KERNEL_SIZE_X", c10::to_string(_geometry.kernel_size.x)},
+             {"KERNEL_SIZE_Y", c10::to_string(_geometry.kernel_size.y)},
+             {"INPUT_BATCH_SIZE", c10::to_string(_input_batch_size)},
+             {"OUTPUT_BATCH_SIZE", c10::to_string(_output_batch_size)},
+             {"INPUT_TILES", c10::to_string(_input_tiles)},
+             {"OUTPUT_TILES", c10::to_string(_output_tiles)},
+             {"INPUT_TILE_WIDTH", c10::to_string(_geometry.input_tile_size.x)},
+             {"INPUT_TILE_HEIGHT", c10::to_string(_geometry.input_tile_size.y)},
              {"OUTPUT_TILE_WIDTH",
-              caffe2::to_string(_geometry.output_tile_size.x)},
+              c10::to_string(_geometry.output_tile_size.x)},
              {"OUTPUT_TILE_HEIGHT",
-              caffe2::to_string(_geometry.output_tile_size.y)},
-             {"INPUT_TILE_X",
-              caffe2::to_string(_geometry.input_tile_grid_size.x)},
+              c10::to_string(_geometry.output_tile_size.y)},
+             {"INPUT_TILE_X", c10::to_string(_geometry.input_tile_grid_size.x)},
              {"OUTPUT_TILE_X",
-              caffe2::to_string(_geometry.output_tile_grid_size.x)},
-             {"INPUT_TILE_CHUNK_SIZE",
-              caffe2::to_string(_input_tile_chunk_size)},
+              c10::to_string(_geometry.output_tile_grid_size.x)},
+             {"INPUT_TILE_CHUNK_SIZE", c10::to_string(_input_tile_chunk_size)},
              {"OUTPUT_TILE_CHUNK_SIZE",
-              caffe2::to_string(_output_tile_chunk_size)},
+              c10::to_string(_output_tile_chunk_size)},
              {"OUTPUT_TILE_BATCH_SIZE",
-              caffe2::to_string(_output_tile_batch_size)},
-             {"TILED_CONVOLUTION", caffe2::to_string(_tiling)},
+              c10::to_string(_output_tile_batch_size)},
+             {"TILED_CONVOLUTION", c10::to_string(_tiling)},
              {"INPUT_PADDING_X",
-              caffe2::to_string(
+              c10::to_string(
                   _geometry.transposed
                       ? _geometry.kernel_size.x - 1 - _geometry.input_padding.x
                       : _geometry.input_padding.x)},
              {"INPUT_PADDING_Y",
-              caffe2::to_string(
+              c10::to_string(
                   _geometry.transposed
                       ? _geometry.kernel_size.y - 1 - _geometry.input_padding.y
                       : _geometry.input_padding.y)},
-             {"INPUT_STRIDE_X", caffe2::to_string(_geometry.input_stride.x)},
-             {"INPUT_STRIDE_Y", caffe2::to_string(_geometry.input_stride.y)},
-             {"TRANSPOSED_CONVOLUTION",
-              caffe2::to_string(_geometry.transposed)},
+             {"INPUT_STRIDE_X", c10::to_string(_geometry.input_stride.x)},
+             {"INPUT_STRIDE_Y", c10::to_string(_geometry.input_stride.y)},
+             {"TRANSPOSED_CONVOLUTION", c10::to_string(_geometry.transposed)},
              {"BOUNDS_CHECK_MODE",
-              caffe2::to_string(bounds_check_mode(_tiling, _geometry))}}),
+              c10::to_string(bounds_check_mode(_tiling, _geometry))}}),
         kernel(_kernel),
         bias(_bias),
         prelu_scale(_prelu_scale),
@@ -176,14 +171,13 @@ class GLConvolution : public GLFilter {
 
     for (int i = 0; i < input_batch_size; i++) {
       bindings.push_back(
-          inputData[i] =
-              new binding{"inputData[" + caffe2::to_string(i) + "]"});
+          inputData[i] = new binding{"inputData[" + c10::to_string(i) + "]"});
     }
 
     for (int i = 0; i < output_batch_size; i++) {
       bindings.push_back(
           previousData[i] =
-              new binding{"previousData[" + caffe2::to_string(i) + "]"});
+              new binding{"previousData[" + c10::to_string(i) + "]"});
     }
 
     return bindings;
@@ -203,7 +197,7 @@ class GLConvolution : public GLFilter {
          i++) {
       bindings.push_back(
           kernel_block[i] =
-              new binding{"Kernel_block[" + caffe2::to_string(i) + "]"});
+              new binding{"Kernel_block[" + c10::to_string(i) + "]"});
     }
 
     return bindings;
index 301b057..4927923 100644 (file)
@@ -35,14 +35,15 @@ class GLReduce : public GLFilter {
   }
 
   GLReduce(bool compute_inv_stdev_ = false, bool compute_norm_ = false)
-      : GLFilter("GLReduce",
-                 vertex_shader,
-                 fragment_shader,
-                 input_bindings(compute_norm_),
-                 {/* no uniform_blocks_bindings */},
-                 {/* no attributes */},
-                 {{"COMPUTE_INV_STDEV", caffe2::to_string((int)compute_inv_stdev_)},
-                  {"COMPUTE_NORM", caffe2::to_string((int)compute_norm_)}}),
+      : GLFilter(
+            "GLReduce",
+            vertex_shader,
+            fragment_shader,
+            input_bindings(compute_norm_),
+            {/* no uniform_blocks_bindings */},
+            {/* no attributes */},
+            {{"COMPUTE_INV_STDEV", c10::to_string((int)compute_inv_stdev_)},
+             {"COMPUTE_NORM", c10::to_string((int)compute_norm_)}}),
         compute_inv_stdev(compute_inv_stdev_),
         compute_norm(compute_norm_) {}
 
@@ -208,18 +209,20 @@ class GLScale : public GLFilter {
     return bindings;
   }
 
-  GLScale(const int _channels,
-          const float* _scale,
-          const float* _bias,
-          const float* _prelu_scale = nullptr,
-          const int _prelu_size = 0)
-      : GLFilter("GLScale",
-                 vertex_shader,
-                 fragment_shader,
-                 input_bindings(_prelu_scale != nullptr),
-                 {/* no uniform blocks */},
-                 {/* no attributes */},
-                 {{"FUSE_PRELU", caffe2::to_string(_prelu_scale != nullptr)}}),
+  GLScale(
+      const int _channels,
+      const float* _scale,
+      const float* _bias,
+      const float* _prelu_scale = nullptr,
+      const int _prelu_size = 0)
+      : GLFilter(
+            "GLScale",
+            vertex_shader,
+            fragment_shader,
+            input_bindings(_prelu_scale != nullptr),
+            {/* no uniform blocks */},
+            {/* no attributes */},
+            {{"FUSE_PRELU", c10::to_string(_prelu_scale != nullptr)}}),
         channels(_channels),
         scale(_scale),
         bias(_bias),
index 2d9d06a..833c6ff 100644 (file)
@@ -39,14 +39,13 @@ class GLPRelu : public GLFilter {
             std::vector<binding*>({BINDING(inputData)}),
             std::vector<binding*>({BINDING(scale_block)}),
             {/* no attributes */},
-            {{"USE_RELU", caffe2::to_string(PRelu)},
-             {"OUTPUT_TILES",
-              caffe2::to_string(_output_tile_x * _output_tile_y)},
-             {"OUTPUT_TILE_X", caffe2::to_string(_output_tile_x)},
-             {"OUTPUT_TILE_WIDTH", caffe2::to_string(_output_tile_width)},
-             {"OUTPUT_TILE_HEIGHT", caffe2::to_string(_output_tile_height)},
+            {{"USE_RELU", c10::to_string(PRelu)},
+             {"OUTPUT_TILES", c10::to_string(_output_tile_x * _output_tile_y)},
+             {"OUTPUT_TILE_X", c10::to_string(_output_tile_x)},
+             {"OUTPUT_TILE_WIDTH", c10::to_string(_output_tile_width)},
+             {"OUTPUT_TILE_HEIGHT", c10::to_string(_output_tile_height)},
              {"TILED_PRELU",
-              caffe2::to_string(_output_tile_x > 1 || _output_tile_y > 1)}}),
+              c10::to_string(_output_tile_x > 1 || _output_tile_y > 1)}}),
         scale(_scale),
         scale_size(_scale_size),
         channels(_channels),
@@ -56,18 +55,19 @@ class GLPRelu : public GLFilter {
         output_tile_height(_output_tile_height) {}
 
   GLPRelu(const int _channels)
-      : GLFilter("GLRelu",
-                 vertex_shader,
-                 fragment_shader,
-                 std::vector<binding*>({BINDING(inputData)}),
-                 {/* no uniform blocks */},
-                 {/* no attributes */},
-                 {{"USE_RELU", caffe2::to_string(Relu)},
-                  {"OUTPUT_TILES", caffe2::to_string(1)},
-                  {"OUTPUT_TILE_X", caffe2::to_string(1)},
-                  {"OUTPUT_TILE_WIDTH", caffe2::to_string(1)},
-                  {"OUTPUT_TILE_HEIGHT", caffe2::to_string(1)},
-                  {"TILED_PRELU", caffe2::to_string(0)}}),
+      : GLFilter(
+            "GLRelu",
+            vertex_shader,
+            fragment_shader,
+            std::vector<binding*>({BINDING(inputData)}),
+            {/* no uniform blocks */},
+            {/* no attributes */},
+            {{"USE_RELU", c10::to_string(Relu)},
+             {"OUTPUT_TILES", c10::to_string(1)},
+             {"OUTPUT_TILE_X", c10::to_string(1)},
+             {"OUTPUT_TILE_WIDTH", c10::to_string(1)},
+             {"OUTPUT_TILE_HEIGHT", c10::to_string(1)},
+             {"TILED_PRELU", c10::to_string(0)}}),
         scale(nullptr),
         scale_block(nullptr),
         scale_size(0),
index 5f4426f..d293745 100644 (file)
@@ -42,23 +42,21 @@ class GLPool : public GLFilter {
             },
             {/* no uniform blocks */},
             {/* no attributes */},
-            {{"KERNEL_SIZE_X", caffe2::to_string(_geometry.kernel_size.x)},
-             {"KERNEL_SIZE_Y", caffe2::to_string(_geometry.kernel_size.y)},
-             {"INPUT_PADDING_X", caffe2::to_string(_geometry.input_padding.x)},
-             {"INPUT_PADDING_Y", caffe2::to_string(_geometry.input_padding.y)},
-             {"INPUT_STRIDE_X", caffe2::to_string(_geometry.input_stride.x)},
-             {"INPUT_STRIDE_Y", caffe2::to_string(_geometry.input_stride.y)},
-             {"INPUT_TILE_WIDTH",
-              caffe2::to_string(_geometry.input_tile_size.x)},
-             {"INPUT_TILE_HEIGHT",
-              caffe2::to_string(_geometry.input_tile_size.y)},
+            {{"KERNEL_SIZE_X", c10::to_string(_geometry.kernel_size.x)},
+             {"KERNEL_SIZE_Y", c10::to_string(_geometry.kernel_size.y)},
+             {"INPUT_PADDING_X", c10::to_string(_geometry.input_padding.x)},
+             {"INPUT_PADDING_Y", c10::to_string(_geometry.input_padding.y)},
+             {"INPUT_STRIDE_X", c10::to_string(_geometry.input_stride.x)},
+             {"INPUT_STRIDE_Y", c10::to_string(_geometry.input_stride.y)},
+             {"INPUT_TILE_WIDTH", c10::to_string(_geometry.input_tile_size.x)},
+             {"INPUT_TILE_HEIGHT", c10::to_string(_geometry.input_tile_size.y)},
              {"OUTPUT_TILE_WIDTH",
-              caffe2::to_string(_geometry.output_tile_size.x)},
+              c10::to_string(_geometry.output_tile_size.x)},
              {"OUTPUT_TILE_HEIGHT",
-              caffe2::to_string(_geometry.output_tile_size.y)},
-             {"TILED_POOLING", caffe2::to_string(_tiling)},
-             {"MAX_POOL", caffe2::to_string(poolType == MaxPool)},
-             {"BOUNDS_CHECK_MODE", caffe2::to_string(1)}}),
+              c10::to_string(_geometry.output_tile_size.y)},
+             {"TILED_POOLING", c10::to_string(_tiling)},
+             {"MAX_POOL", c10::to_string(poolType == MaxPool)},
+             {"BOUNDS_CHECK_MODE", c10::to_string(1)}}),
         geometry(_geometry) {}
   ~GLPool() {}
 
index a8ac831..0188fab 100644 (file)
@@ -16,14 +16,15 @@ class GLSigmoid : public GLFilter {
   binding* outputSize;
 
   GLSigmoid(OpType opType)
-      : GLFilter("GLSigmoid",
-                 vertex_shader,
-                 fragment_shader,
-                 {BINDING(outputSize), BINDING(inputData)},
-                 {/* no uniform blocks */},
-                 {/* no attributes */},
-                 {{"SIGMOID", caffe2::to_string(opType == Sigmoid)},
-                  {"TANH", caffe2::to_string(opType == Tanh)}}) {}
+      : GLFilter(
+            "GLSigmoid",
+            vertex_shader,
+            fragment_shader,
+            {BINDING(outputSize), BINDING(inputData)},
+            {/* no uniform blocks */},
+            {/* no attributes */},
+            {{"SIGMOID", c10::to_string(opType == Sigmoid)},
+             {"TANH", c10::to_string(opType == Tanh)}}) {}
 
   template <typename T>
   void sigmoid(const GLImageVector<T>& input_images, const GLImageVector<T>& output_images);
index 0f120f8..0eb3d59 100644 (file)
@@ -42,9 +42,9 @@ class GLSoftmaxReduce : public GLFilter {
             input_bindings(),
             {/* no uniform_blocks_bindings */},
             {/* no attributes */},
-            {{"COMPUTE_SUM", caffe2::to_string((int)compute_sum_)},
-             {"INPUT_TILE_X", caffe2::to_string(input_tile_x)},
-             {"TILED_SOFTMAX", caffe2::to_string(int(tiled))}}) {}
+            {{"COMPUTE_SUM", c10::to_string((int)compute_sum_)},
+             {"INPUT_TILE_X", c10::to_string(input_tile_x)},
+             {"TILED_SOFTMAX", c10::to_string(int(tiled))}}) {}
 
   template <typename T>
   void reduce(const GLImage<T>* input_image,
@@ -190,8 +190,8 @@ class GLSoftmaxScale : public GLFilter {
             input_bindings(),
             {/* no uniform blocks */},
             {/* no attributes */},
-            {{"COMPUTE_EXP", caffe2::to_string((int)_compute_exp)},
-             {"TILED_SOFTMAX", caffe2::to_string((int)tiled)}}) {}
+            {{"COMPUTE_EXP", c10::to_string((int)_compute_exp)},
+             {"TILED_SOFTMAX", c10::to_string((int)tiled)}}) {}
 
   template <typename T>
   void scale(const GLImage<T>* input_image,
index af3f8ac..a6c32a5 100644 (file)
@@ -19,13 +19,18 @@ class GLStylizer : public GLFilter {
 
  public:
   GLStylizer(bool _deprocess = false, InputFormat input_format = BGRA)
-      : GLFilter(_deprocess ? "GLDeStylizer" : "GLStylizer",
-                 vertex_shader,
-                 fragment_shader,
-                 std::vector<binding*>({BINDING(inputData), BINDING(mean), BINDING(noise_std), BINDING(outputSize)}),
-                 {/* no uniform blocks */},
-                 {/* no attributes */},
-                 {{"DEPROCESS", caffe2::to_string(_deprocess)}, {"RGBAINPUT", caffe2::to_string(input_format)}}),
+      : GLFilter(
+            _deprocess ? "GLDeStylizer" : "GLStylizer",
+            vertex_shader,
+            fragment_shader,
+            std::vector<binding*>({BINDING(inputData),
+                                   BINDING(mean),
+                                   BINDING(noise_std),
+                                   BINDING(outputSize)}),
+            {/* no uniform blocks */},
+            {/* no attributes */},
+            {{"DEPROCESS", c10::to_string(_deprocess)},
+             {"RGBAINPUT", c10::to_string(input_format)}}),
         deprocess(_deprocess) {}
 
   template <typename T1, typename T2>
index 690a33c..c8e5891 100644 (file)
@@ -814,8 +814,8 @@ void testOpenGLConcat(int N, std::vector<int> Cs, int H, int W, bool tiling = fa
             << "H: " << H << ", W: " << W;
   Workspace ws;
   for (int i = 0; i < Cs.size(); i++) {
-    auto* t = BlobGetMutableTensor(
-        ws.CreateBlob("X_cpu" + caffe2::to_string(i)), CPU);
+    auto* t =
+        BlobGetMutableTensor(ws.CreateBlob("X_cpu" + c10::to_string(i)), CPU);
     t->Resize(N, Cs[i], H, W);
     CPUContext ctx0;
     // Too noisy.
@@ -826,8 +826,8 @@ void testOpenGLConcat(int N, std::vector<int> Cs, int H, int W, bool tiling = fa
   for (int i = 0; i < Cs.size(); i++) {
     auto& op = *(netdef.add_op());
     op.set_type("CopyToOpenGL");
-    op.add_input("X_cpu" + caffe2::to_string(i));
-    op.add_output("X_gl" + caffe2::to_string(i));
+    op.add_input("X_cpu" + c10::to_string(i));
+    op.add_output("X_gl" + c10::to_string(i));
     if (tiling) {
       int tile_x = 1, tile_y = 1;
       computeOutputTiles(Cs[i], tile_x, tile_y);
@@ -849,7 +849,7 @@ void testOpenGLConcat(int N, std::vector<int> Cs, int H, int W, bool tiling = fa
     auto& op = *(netdef.add_op());
     op.set_type("OpenGLConcat");
     for (int i = 0; i < Cs.size(); i++) {
-      op.add_input("X_gl" + caffe2::to_string(i));
+      op.add_input("X_gl" + c10::to_string(i));
     }
     {
       auto& arg = *(op.add_arg());
@@ -871,7 +871,7 @@ void testOpenGLConcat(int N, std::vector<int> Cs, int H, int W, bool tiling = fa
     auto& op = *(netdef.add_op());
     op.set_type("Concat");
     for (int i = 0; i < Cs.size(); i++) {
-      op.add_input("X_cpu" + caffe2::to_string(i));
+      op.add_input("X_cpu" + c10::to_string(i));
     }
     auto& arg = *(op.add_arg());
     arg.set_name("order");
index 732a040..c309f6f 100644 (file)
@@ -13,9 +13,9 @@ std::string RunCountNetObserver::debugInfo() {
 #if CAFFE2_ANDROID
   // workaround
   int foo = cnt_;
-  return "This operator runs " + caffe2::to_string(foo) + " times.";
+  return "This operator runs " + c10::to_string(foo) + " times.";
 #else
-  return "This operator runs " + caffe2::to_string(cnt_) + " times.";
+  return "This operator runs " + c10::to_string(cnt_) + " times.";
 #endif
 }
 
index 7058092..88f5bd6 100644 (file)
@@ -557,7 +557,7 @@ Caffe2Ops Caffe2Backend::CreatePadPool(
       bool pads_flag = false;
       str += "[";
       for (const auto& i : pads) {
-        str += caffe2::to_string(i) + ",";
+        str += c10::to_string(i) + ",";
         pads_flag = pads_flag || i > 0;
       }
       str += "]";
index 1a0031f..52919b6 100644 (file)
@@ -40,7 +40,11 @@ file(GLOB tmp *.cc)
 file(GLOB tmp_cudnn *_cudnn.cc)
 exclude(tmp "${tmp}" ${tmp_cudnn})
 set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} ${tmp})
-file(GLOB_RECURSE tmp experimental/c10/*.cc)
+
+if (BUILD_C10_EXPERIMENTAL_OPS)
+    file(GLOB_RECURSE tmp experimental/c10/*.cc)
+endif()
+
 set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} ${tmp})
 # exclude test files and gpu files
 file(GLOB tmp *_test.cc)
index a5a9b86..83183e1 100644 (file)
@@ -35,7 +35,7 @@ class BBoxTransformOp final : public Operator<Context> {
     CAFFE_ENFORCE_EQ(
         weights_.size(),
         4,
-        "weights size " + caffe2::to_string(weights_.size()) + "must be 4.");
+        "weights size " + c10::to_string(weights_.size()) + "must be 4.");
   }
   USE_OPERATOR_CONTEXT_FUNCTIONS;
 
index 2ddd2db..6dda625 100644 (file)
@@ -65,15 +65,15 @@ class CollectAndDistributeFpnRpnProposalsOp final : public Operator<Context> {
     CAFFE_ENFORCE_GE(
         roi_max_level_,
         roi_min_level_,
-        "roi_max_level " + caffe2::to_string(roi_max_level_) +
+        "roi_max_level " + c10::to_string(roi_max_level_) +
             " must be greater than or equal to roi_min_level " +
-            caffe2::to_string(roi_min_level_) + ".");
+            c10::to_string(roi_min_level_) + ".");
     CAFFE_ENFORCE_GE(
         rpn_max_level_,
         rpn_min_level_,
-        "rpn_max_level " + caffe2::to_string(rpn_max_level_) +
+        "rpn_max_level " + c10::to_string(rpn_max_level_) +
             " must be greater than or equal to rpn_min_level " +
-            caffe2::to_string(rpn_min_level_) + ".");
+            c10::to_string(rpn_min_level_) + ".");
   }
 
   ~CollectAndDistributeFpnRpnProposalsOp() {}
index 50167fa..a368842 100644 (file)
@@ -55,9 +55,9 @@ class DoOp final : public Operator<Context> {
           outer_blobs_idx[blob_idx] >= 0 &&
               outer_blobs_idx[blob_idx] < outer_blob_names.size(),
           "Invalid blob bindings: outer blob index (" +
-              caffe2::to_string(outer_blobs_idx[blob_idx]) + ", inner name: " +
+              c10::to_string(outer_blobs_idx[blob_idx]) + ", inner name: " +
               inner_blobs[blob_idx] + ") is out of bounds [0, " +
-              caffe2::to_string(outer_blob_names.size() - 1) + "]");
+              c10::to_string(outer_blob_names.size() - 1) + "]");
       const auto& outer_name = outer_blob_names[outer_blobs_idx[blob_idx]];
       CAFFE_ENFORCE(
           !used_outer_names.count(outer_name),
index fb1dc59..cb7a5f2 100644 (file)
@@ -324,7 +324,7 @@ bool HSoftmaxSearchOp<float, CPUContext>::extractNodes(
     info.emplace_back(std::make_pair(n.name(), node.scores(i++)));
   }
   for (const int n : node.word_ids()) {
-    info.emplace_back(std::make_pair(caffe2::to_string(n), node.scores(i++)));
+    info.emplace_back(std::make_pair(c10::to_string(n), node.scores(i++)));
   }
 
   for (const auto& n : node.children()) {
index d25b65d..4614b57 100644 (file)
@@ -34,7 +34,7 @@ class ONNXWhileOp final : public Operator<Context> {
         body_net_def_.set_name("loop_net");
       } else {
         ++counter;
-        body_net_def_.set_name("loop_net." + caffe2::to_string(counter));
+        body_net_def_.set_name("loop_net." + c10::to_string(counter));
       }
     }
   }
index 63d58f3..a4c3d45 100644 (file)
@@ -40,7 +40,7 @@ class RecurrentNetworkBlobFetcherOp final : public Operator<Context> {
         const auto& currentTensor = currentBlob->Get<Tensor>();
 
         std::string newBlobName =
-            prefix_ + std::string("_") + blob_name + caffe2::to_string(i);
+            prefix_ + std::string("_") + blob_name + c10::to_string(i);
         blob_names_vector.push_back(newBlobName);
 
         BlobGetMutableTensor(ws_->CreateBlob(newBlobName), CPU)
index 4cb53a6..3300f78 100644 (file)
@@ -110,7 +110,7 @@ class RecurrentNetworkExecutorBase {
       // avoid conflicting timestep blobs when reusing workspaces, as with
       // the forward-only mode.
       std::string this_timestep_blob =
-          timestep_blob_ + "_rnnexec_t" + caffe2::to_string(t);
+          timestep_blob_ + "_rnnexec_t" + c10::to_string(t);
       BlobGetMutableTensor(ws->CreateBlob(this_timestep_blob), CPU)->Resize(1);
       auto b = ws->GetBlob(this_timestep_blob);
       CAFFE_ENFORCE(b);
index 0b0b72d..254f917 100644 (file)
@@ -10,8 +10,7 @@ OpSchema::Cost CostInferenceForSparseLengths(
   CAFFE_ENFORCE_GE(
       inputs.size(),
       min_num_of_inputs,
-      def.type() + " requires at least " +
-          caffe2::to_string(min_num_of_inputs));
+      def.type() + " requires at least " + c10::to_string(min_num_of_inputs));
 
   const TensorShape data = inputs[0];
   const TensorShape indices = inputs[1 + use_weight];
index d8c036e..5715e5a 100644 (file)
@@ -44,8 +44,8 @@ void DumpGraph(NNGraph* g) {
     assert(node->data() && "Node doesn't have data, can't render it");
     if (isa<NeuralNetOperator>(node->data())) {
       auto* op = dyn_cast<NeuralNetOperator>(node->data().get());
-      labelMap["label"] = op->getName() + " (" +
-          caffe2::to_string((unsigned long long)node) + ")";
+      labelMap["label"] =
+          op->getName() + " (" + c10::to_string((unsigned long long)node) + ")";
       auto* annotation = op->getAnnotation();
       if (annotation && isa<Caffe2Annotation>(annotation)) {
         auto device_annotation = dyn_cast<Caffe2Annotation>(annotation);
@@ -60,8 +60,8 @@ void DumpGraph(NNGraph* g) {
     } else if (isa<Data>(node->data())) {
       auto tensor = dyn_cast<NeuralNetData>(node->data().get());
       labelMap["label"] = tensor->getName();
-      labelMap["label"] += "_" + caffe2::to_string(tensor->getVersion()) + " " +
-          caffe2::to_string((unsigned long long)node);
+      labelMap["label"] += "_" + c10::to_string(tensor->getVersion()) + " " +
+          c10::to_string((unsigned long long)node);
     }
     return labelMap;
   };
index 6335933..9605be3 100644 (file)
@@ -11,10 +11,10 @@ namespace {
   void AddConv(caffe2::NetDef* net, int tick) {
     auto* op = net->add_op();
     op->set_type("MyConv");
-    op->add_input("N" + caffe2::to_string(tick));
-    op->add_input("W" + caffe2::to_string(tick));
-    op->add_input("b" + caffe2::to_string(tick));
-    op->add_output("N" + caffe2::to_string(tick+1));
+    op->add_input("N" + c10::to_string(tick));
+    op->add_input("W" + c10::to_string(tick));
+    op->add_input("b" + c10::to_string(tick));
+    op->add_output("N" + c10::to_string(tick + 1));
   }
 
   bool Supports(const caffe2::OperatorDef& op) {
index e9da69a..ec87365 100644 (file)
@@ -16,7 +16,7 @@ TEST(Converter, Basic) {
       caffe2::OperatorDef *def = net.add_op();
       def->set_type("Conv");
       def->add_input("X");
-      def->add_input("W" + caffe2::to_string(i)); // different weights
+      def->add_input("W" + c10::to_string(i)); // different weights
       ADD_ARG(def, "kernel", i, 3);
       ADD_ARG(def, "stride", i, 1);
       ADD_ARG(def, "pad", i, 0);
@@ -42,8 +42,8 @@ TEST(Converter, UnknownType) {
   def->set_type("NeverSeen");
   def->add_input("X");
   def->add_output("X");
-  def->mutable_device_option()->set_node_name("device_" +
-      caffe2::to_string(rand() % 2));
+  def->mutable_device_option()->set_node_name(
+      "device_" + c10::to_string(rand() % 2));
   auto nn = caffe2::convertToNNModule(net);
   auto new_netdef = caffe2::convertToCaffe2Proto(nn);
 }
index 725516d..5dc14fa 100644 (file)
@@ -20,8 +20,8 @@ TEST(DeviceTest, InsertCopies) {
       caffe2::OperatorDef* def = net.add_op();
       def->set_type("Conv");
       def->add_input("X");
-      def->add_input("W" + caffe2::to_string(i));
-      def->add_input("b" + caffe2::to_string(i));
+      def->add_input("W" + c10::to_string(i));
+      def->add_input("b" + c10::to_string(i));
       ADD_ARG(def, "kernel", i, 3);
       ADD_ARG(def, "stride", i, 1);
       ADD_ARG(def, "pad", i, 0);
index 680cefe..5998776 100644 (file)
@@ -18,8 +18,8 @@ TEST(MobileTest, Convolution) {
       caffe2::OperatorDef* def = net.add_op();
       def->set_type("Conv");
       def->add_input("X");
-      def->add_input("W" + caffe2::to_string(i));
-      def->add_input("b" + caffe2::to_string(i));
+      def->add_input("W" + c10::to_string(i));
+      def->add_input("b" + c10::to_string(i));
       ADD_ARG(def, "kernel", i, 3);
       ADD_ARG(def, "stride", i, 1);
       ADD_ARG(def, "pad", i, 0);
index e8675dd..a0e6c6e 100644 (file)
@@ -67,14 +67,14 @@ DataRandomFiller::DataRandomFiller(
     const auto& op_types = input_types[i];
     CAFFE_ENFORCE(
         op_dims.size() == op.input_size(),
-        op.name() + " has " + caffe2::to_string(op.input_size()) +
+        op.name() + " has " + c10::to_string(op.input_size()) +
             " inputs; while the input dimension size is " +
-            caffe2::to_string(op_dims.size()));
+            c10::to_string(op_dims.size()));
     CAFFE_ENFORCE(
         op_types.size() == op.input_size(),
-        op.name() + " has " + caffe2::to_string(op.input_size()) +
+        op.name() + " has " + c10::to_string(op.input_size()) +
             " inputs; while the input type size is " +
-            caffe2::to_string(op_types.size()));
+            c10::to_string(op_types.size()));
 
     for (size_t j = 0; j < op.input_size(); ++j) {
       inputs_[op.input(j)] =
index 522e23f..78c394c 100644 (file)
@@ -33,10 +33,10 @@ class StdOutputFormatter : public OutputFormatter {
     auto mean = get_mean(durations_ms);
     auto throughput = iterations / (mean / MS_IN_SECOND);
     return std::string("\n\n====================================\n") +
-        "Predictor benchmark finished with " + caffe2::to_string(threads) +
-        " threads.\nThroughput:\t\t" + caffe2::to_string(throughput) +
+        "Predictor benchmark finished with " + c10::to_string(threads) +
+        " threads.\nThroughput:\t\t" + c10::to_string(throughput) +
         " iterations/s\nVariation:\t\t" +
-        caffe2::to_string(get_stdev(durations_ms) * 100 / mean) +
+        c10::to_string(get_stdev(durations_ms) * 100 / mean) +
         "%\n====================================";
   }
 };
index 3f62edf..d5681e5 100644 (file)
@@ -105,8 +105,8 @@ class SafeEnqueueBlobsOp final : public Operator<Context> {
     auto size = queue->getNumBlobs();
     CAFFE_ENFORCE(
         OutputSize() == size + 1,
-        "Expected " + caffe2::to_string(size + 1) + ", " +
-            " got: " + caffe2::to_string(size));
+        "Expected " + c10::to_string(size + 1) + ", " +
+            " got: " + c10::to_string(size));
     bool status = queue->blockingWrite(this->Outputs());
     Output(size)->Resize();
     math::Set<bool, Context>(
index 37ed7f9..d233e1e 100644 (file)
@@ -112,7 +112,7 @@ class PyTorchStreamReader final {
         file_size_ % kFieldAlignment == 0,
         "File length is not a multiple of the alignment"
         " size. Is this a valid PyTorch model file? File size: ",
-        caffe2::to_string(file_size_));
+        c10::to_string(file_size_));
     readAndValidateFileHeader();
   }
 
@@ -209,9 +209,9 @@ class PyTorchStreamReader final {
     AT_ASSERTM(
         file_format_version >= kMinSupportedFileFormatVersion,
         "Attempted to read a PyTorch file with version ",
-        caffe2::to_string(file_format_version),
+        c10::to_string(file_format_version),
         ", but the minimum supported version for reading is ",
-        caffe2::to_string(kMinSupportedFileFormatVersion),
+        c10::to_string(kMinSupportedFileFormatVersion),
         ". Your PyTorch script module file is too old. Please re-export it again.");
     AT_ASSERTM(
         file_format_version <= kMaxSupportedFileFormatVersion,
index 2587ccf..fa56fbd 100644 (file)
@@ -29,7 +29,7 @@ uint8_t* GetMutableData(int type_index, TensorCPU* tensor) {
   CAFFE_ENFORCE_EQ(
       gTypeMapper.count(type_index),
       1,
-      "Invalid type index " + caffe2::to_string(type_index) + ".");
+      "Invalid type index " + c10::to_string(type_index) + ".");
   return gTypeMapper.at(type_index)(tensor);
 }
 
index 6ca1cf6..e12be6a 100644 (file)
@@ -124,7 +124,7 @@ class CAFFE2_API PatternNetTransform : public Transform {
   bool argument_match_ = false;
 
   const string TransformBlobWrapper(const string& blob_name) {
-    return "transform/" + blob_name + "_" + caffe2::to_string(ssa_id_);
+    return "transform/" + blob_name + "_" + c10::to_string(ssa_id_);
   }
 
   int ssa_id_ = 0;
index a56539a..c02e6a9 100644 (file)
@@ -102,7 +102,7 @@ bool forkAndPipe(
     }));                                                                     \
     int keyPhraseCount = 0;                                                  \
     std::string keyPhrase =                                                  \
-        std::string(name) + "(" + caffe2::to_string(signum) + "), Thread";   \
+        std::string(name) + "(" + c10::to_string(signum) + "), Thread";      \
     size_t loc = 0;                                                          \
     while ((loc = stderrBuffer.find(keyPhrase, loc)) != std::string::npos) { \
       keyPhraseCount += 1;                                                   \
index be430fd..96d1b53 100644 (file)
@@ -421,6 +421,9 @@ if(USE_OPENCV)
   if(OpenCV_FOUND)
     include_directories(SYSTEM ${OpenCV_INCLUDE_DIRS})
     list(APPEND Caffe2_DEPENDENCY_LIBS ${OpenCV_LIBS})
+    if (MSVC AND USE_CUDA)
+        list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS ${OpenCV_LIBS})
+    endif()
     message(STATUS "OpenCV found (${OpenCV_CONFIG_PATH})")
   else()
     message(WARNING "Not compiling with OpenCV. Suppress this warning with -DUSE_OPENCV=OFF")
@@ -933,12 +936,6 @@ if (CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
     set(ONNX_CUSTOM_PROTOC_EXECUTABLE ${CAFFE2_CUSTOM_PROTOC_EXECUTABLE})
   endif()
   set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
-  # We will build onnx as static libs and embed it directly into the binary.
-  if (MSVC AND BUILD_SHARED_LIBS)
-    # That also means we want to export all symbols from the shared
-    # library we are building
-    set(ONNX_BUILD_MAIN_LIB ON)
-  endif()
   set(BUILD_SHARED_LIBS OFF)
   set(ONNX_USE_MSVC_STATIC_RUNTIME ${CAFFE2_USE_MSVC_STATIC_RUNTIME})
   set(ONNX_USE_LITE_PROTO ${CAFFE2_USE_LITE_PROTO})
index 6b48fa9..4b6e391 100644 (file)
@@ -1,9 +1,4 @@
-# ---[ Add modules
-# TODO(orionr): Enable Detectron ops for Windows DLL when we
-# can figure out how to get it to build
-if (NOT (MSVC AND BUILD_SHARED_LIBS))
-  add_subdirectory(detectron)
-endif()
+add_subdirectory(detectron)
 add_subdirectory(module_test)
 add_subdirectory(observers)
 add_subdirectory(rocksdb)
index b3341be..e00fe8c 100644 (file)
@@ -19,24 +19,22 @@ void NetObserverReporterPrint::report(
   for (auto& p : info) {
     if ((p.first == "NET_DELAY") && (info.size() == 1)) {
       // for Net_delay perf
-      caffe2_perf.push_back(
-          {{"type", "NET"},
-           {"value", caffe2::to_string(p.second.latency * 1000)},
-           {"unit", "us"},
-           {"metric", "latency"}});
+      caffe2_perf.push_back({{"type", "NET"},
+                             {"value", c10::to_string(p.second.latency * 1000)},
+                             {"unit", "us"},
+                             {"metric", "latency"}});
     } else if (p.first != "NET_DELAY") {
       // for operator perf
       std::string shape_str = get_tensor_shapes(p.second);
       std::string args_str = get_op_args(p.second);
 
-      caffe2_perf.push_back(
-          {{"type", p.first},
-           {"value", caffe2::to_string(p.second.latency * 1000)},
-           {"unit", "us"},
-           {"metric", "latency"}});
+      caffe2_perf.push_back({{"type", p.first},
+                             {"value", c10::to_string(p.second.latency * 1000)},
+                             {"unit", "us"},
+                             {"metric", "latency"}});
       if (p.second.flops > 0) {
         caffe2_perf.push_back({{"type", p.first},
-                               {"value", caffe2::to_string(p.second.flops)},
+                               {"value", c10::to_string(p.second.flops)},
                                {"unit", "flop"},
                                {"metric", "flops"}});
       }
index 45a5949..0ef59ce 100644 (file)
@@ -167,7 +167,7 @@ caffe2::string PerfNetObserver::getObserverName(const OperatorBase* op, int idx)
                                                 : "NO_OUTPUT")
                            : "NO_DEF");
   caffe2::string name =
-      "ID_" + caffe2::to_string(idx) + "_" + opType + "_" + displayName;
+      "ID_" + c10::to_string(idx) + "_" + opType + "_" + displayName;
   return name;
 }
 
index d043413..aced0bc 100644 (file)
@@ -984,10 +984,10 @@ class ScriptModuleSerializer final {
       } else {
         record_id = writer_.writeRecord(tensor.storage().data(), record_size);
       }
-      external_data->set_record_id(caffe2::to_string(record_id));
+      external_data->set_record_id(c10::to_string(record_id));
       storageMap_[key] = record_id;
     } else {
-      external_data->set_record_id(caffe2::to_string(it->second));
+      external_data->set_record_id(c10::to_string(it->second));
     }
     // TODO handle device case, set the device_detail and load to CUDA device
   }
index 928912c..1598392 100644 (file)
@@ -473,7 +473,7 @@ class ScriptModuleDeserializer final {
     }
     auto type = at::typeMetaToScalarType(
         caffe2::DataTypeToTypeMeta(tensor_proto.data_type()));
-    uint64_t record_id = caffe2::stoull(external_data.record_id());
+    uint64_t record_id = c10::stoull(external_data.record_id());
     AT_ASSERT(record_id != 0);
     auto storage_it = storageMap_.find(record_id);
     if (storage_it == storageMap_.end()) {