Make C10_MOBILE consistent with how feature macros are usually used (#17481)

author Sebastian Messmer <messmer@fb.com>

Thu, 28 Feb 2019 01:54:51 +0000 (17:54 -0800)

committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>

Thu, 28 Feb 2019 01:57:51 +0000 (17:57 -0800)
author Sebastian Messmer <messmer@fb.com>
Thu, 28 Feb 2019 01:54:51 +0000 (17:54 -0800)
committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
Thu, 28 Feb 2019 01:57:51 +0000 (17:57 -0800)
diff --git a/aten/src/ATen/core/LegacyTypeDispatch.cpp b/aten/src/ATen/core/LegacyTypeDispatch.cpp

index c1bf2af..d9936ea 100644 (file)
--- a/aten/src/ATen/core/LegacyTypeDispatch.cpp
+++ b/aten/src/ATen/core/LegacyTypeDispatch.cpp
@@ -26,7 +26,7 @@ namespace at {
  /// In the CAFFE2_FB_LIMITED_MOBILE_CAPABILITY build setting,
  /// thread_local is not supported. In that case, we don't provide
  /// `at::NonVariableTypeMode`.
-#if !C10_MOBILE && !defined(CAFFE2_FB_LIMITED_MOBILE_CAPABILITY)
+#if !defined(C10_MOBILE) && !defined(CAFFE2_FB_LIMITED_MOBILE_CAPABILITY)
  
  thread_local bool NonVariableTypeMode_enabled = false;
  
@@ -38,7 +38,7 @@ void NonVariableTypeMode::set_enabled(bool enabled) {
    NonVariableTypeMode_enabled = enabled;
  }
  
-#else // C10_MOBILE || defined(CAFFE2_FB_LIMITED_MOBILE_CAPABILITY)
+#else // defined(C10_MOBILE) || defined(CAFFE2_FB_LIMITED_MOBILE_CAPABILITY)
  
  bool NonVariableTypeMode::is_enabled() {
    throw std::runtime_error("NonVariableTypeMode is not supported on mobile");
diff --git a/aten/src/ATen/core/aten_interned_strings.h b/aten/src/ATen/core/aten_interned_strings.h

index 9e3a40f..dae1933 100644 (file)
--- a/aten/src/ATen/core/aten_interned_strings.h
+++ b/aten/src/ATen/core/aten_interned_strings.h
@@ -8,7 +8,7 @@
  // To explicitly use interned strings as symbols in your code, you must add
  // them to this list.
  
-#if !C10_MOBILE
+#ifndef C10_MOBILE
  #define FORALL_ATEN_BASE_SYMBOLS(_) \
  _(aten, __and__) \
  _(aten, __iand__) \
diff --git a/aten/src/ATen/core/interned_strings.h b/aten/src/ATen/core/interned_strings.h

index ed4b768..ce88112 100644 (file)
--- a/aten/src/ATen/core/interned_strings.h
+++ b/aten/src/ATen/core/interned_strings.h
@@ -10,7 +10,7 @@
  
  namespace c10 {
  
-#if !C10_MOBILE
+#ifndef C10_MOBILE
  #define FORALL_NS_SYMBOLS(_)       \
    _(namespaces, prim)              \
    _(namespaces, aten)              \
diff --git a/aten/src/ATen/native/Convolution.cpp b/aten/src/ATen/native/Convolution.cpp

index 080f53a..cbfc8bb 100644 (file)
--- a/aten/src/ATen/native/Convolution.cpp
+++ b/aten/src/ATen/native/Convolution.cpp
@@ -156,7 +156,7 @@ auto ConvParams::use_nnpack(const at::Tensor& input) const -> bool {
           !is_dilated() && // or dilation
           !transposed &&   // or transposed tensors
           input.ndimension() == 4 // must be in NCHW format
-#if !C10_MOBILE && !defined(CAFFE2_FB_LIMITED_MOBILE_CAPABILITY)
+#if !defined(C10_MOBILE) && !defined(CAFFE2_FB_LIMITED_MOBILE_CAPABILITY)
           && input.size(0) >= 16 // ensure large enough batch size to ensure perf, tuneable
  #endif
       ;
diff --git a/c10/macros/Macros.h b/c10/macros/Macros.h

index 8834f83..b12d8b7 100644 (file)
--- a/c10/macros/Macros.h
+++ b/c10/macros/Macros.h
@@ -110,8 +110,8 @@ namespace at { namespace cuda { using namespace c10::hip; }}
  #define C10_DEVICE __device__
  #define C10_HOST __host__
  // constants from (https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications)
-// The maximum number of threads per multiprocessor is 1024 for Turing architecture (7.5) 
-// but 2048 for previous architectures. You'll get warnings if you exceed these constants. 
+// The maximum number of threads per multiprocessor is 1024 for Turing architecture (7.5)
+// but 2048 for previous architectures. You'll get warnings if you exceed these constants.
  // Hence, the following macros adjust the input values from the user to resolve potential warnings.
  #if __CUDA_ARCH__ >= 750
  constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 1024;
@@ -121,17 +121,17 @@ constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 2048;
  // CUDA_MAX_THREADS_PER_BLOCK is same for all architectures currently
  constexpr uint32_t CUDA_MAX_THREADS_PER_BLOCK = 1024;
  // CUDA_THREADS_PER_BLOCK_FALLBACK is the "canonical fallback" choice of block size.
-// 256 is a good number for this fallback and should give good occupancy and 
+// 256 is a good number for this fallback and should give good occupancy and
  // versatility across all architectures.
  constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256;
  // NOTE: if you are thinking of constexpr-ify the inputs to launch bounds, it
-//       turns out that although __launch_bounds__ can take constexpr, it 
-//       can't take a constexpr that has anything to do with templates. 
-//       Currently we use launch_bounds that depend on template arguments in 
-//       Loops.cuh, Reduce.cuh and LossCTC.cuh. Hence, C10_MAX_THREADS_PER_BLOCK and 
+//       turns out that although __launch_bounds__ can take constexpr, it
+//       can't take a constexpr that has anything to do with templates.
+//       Currently we use launch_bounds that depend on template arguments in
+//       Loops.cuh, Reduce.cuh and LossCTC.cuh. Hence, C10_MAX_THREADS_PER_BLOCK and
  //       C10_MIN_BLOCKS_PER_SM are kept as macros.
-// Suppose you were planning to write __launch_bounds__(a, b), based on your performance tuning on a modern GPU. 
-// Instead, you should write __launch_bounds__(C10_MAX_THREADS_PER_BLOCK(a), C10_MIN_BLOCKS_PER_SM(a, b)), 
+// Suppose you were planning to write __launch_bounds__(a, b), based on your performance tuning on a modern GPU.
+// Instead, you should write __launch_bounds__(C10_MAX_THREADS_PER_BLOCK(a), C10_MIN_BLOCKS_PER_SM(a, b)),
  // which will also properly respect limits on old architectures.
  #define C10_MAX_THREADS_PER_BLOCK(val) (((val) <= CUDA_MAX_THREADS_PER_BLOCK) ? (val) : CUDA_THREADS_PER_BLOCK_FALLBACK)
  #define C10_MIN_BLOCKS_PER_SM(threads_per_block, blocks_per_sm) ((((threads_per_block)*(blocks_per_sm) <= CUDA_MAX_THREADS_PER_SM) ? (blocks_per_sm) : ((CUDA_MAX_THREADS_PER_SM + (threads_per_block) - 1) / (threads_per_block))))
@@ -164,9 +164,6 @@ constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256;
  #define C10_MOBILE 1
  #elif (defined(__APPLE__) && TARGET_OS_MAC)
  #define C10_IOS 1
-#define C10_MOBILE 0
-#else
-#define C10_MOBILE 0
  #endif // ANDROID / IOS / MACOS
  
  // Portably determine if a type T is trivially copyable or not.
diff --git a/c10/util/numa.cpp b/c10/util/numa.cpp

index 530316a..2f4ceda 100644 (file)
--- a/c10/util/numa.cpp
+++ b/c10/util/numa.cpp
@@ -2,7 +2,7 @@
  
  C10_DEFINE_bool(caffe2_cpu_numa_enabled, false, "Use NUMA whenever possible.");
  
-#if defined(__linux__) && !defined(C10_DISABLE_NUMA) && C10_MOBILE == 0
+#if defined(__linux__) && !defined(C10_DISABLE_NUMA) && !defined(C10_MOBILE)
  #include <numa.h>
  #include <numaif.h>
  #include <unistd.h>
diff --git a/caffe2/core/c10_operator.h b/caffe2/core/c10_operator.h

index 92aef48..e05d5d7 100644 (file)
--- a/caffe2/core/c10_operator.h
+++ b/caffe2/core/c10_operator.h
@@ -133,7 +133,7 @@ inline c10::FunctionSchema make_function_schema_for_c10(const char* OperatorName
   * - calling C10_REGISTER_CAFFE2_OPERATOR_CUDA is optional and can be omitted if
   * you don't want to expose the operator for CUDA operations.
   */
-#if !C10_MOBILE
+#ifndef C10_MOBILE
  #define C10_DECLARE_CAFFE2_OPERATOR(OperatorName) \
    namespace caffe2 {                              \
    namespace _c10_ops {                            \
diff --git a/caffe2/core/operator_c10wrapper.h b/caffe2/core/operator_c10wrapper.h

index 192a73d..b8511f5 100644 (file)
--- a/caffe2/core/operator_c10wrapper.h
+++ b/caffe2/core/operator_c10wrapper.h
@@ -180,7 +180,7 @@ C10_DECLARE_REGISTRY(
      Workspace*);
  
  // TODO Also register c10 operators on mobile
-#if !C10_MOBILE
+#ifndef C10_MOBILE
  // TODO Currently we only register the CPU variant. This is going to be fixed
  //      once the tensor detemplatization lands.
  #define REGISTER_C10_OPERATOR_FOR_CAFFE2_DISPATCH(OperatorHandle, Name, NumOutputParameters)  \
diff --git a/caffe2/mobile/contrib/ios/ios_caffe_predictor.cc b/caffe2/mobile/contrib/ios/ios_caffe_predictor.cc

index 2554771..c2da661 100644 (file)
--- a/caffe2/mobile/contrib/ios/ios_caffe_predictor.cc
+++ b/caffe2/mobile/contrib/ios/ios_caffe_predictor.cc
@@ -2,7 +2,7 @@
  #include "caffe2/core/flags.h"
  #include "caffe2/core/tensor.h"
  
-#if defined(CAFFE2_USE_MPSCNN) && C10_MOBILE
+#if defined(CAFFE2_USE_MPSCNN) && defined(C10_MOBILE)
  #include "caffe2/mobile/contrib/ios/mpscnn/mpscnn.h"
  #endif
  
@@ -14,7 +14,7 @@ Caffe2IOSPredictor* Caffe2IOSPredictor::NewCaffe2IOSPredictor(const caffe2::NetD
                                                                bool allowMetalOperators) {
    caffe2::NetDef metal_predict_net;
    bool usingMetalOperators = false;
-#if defined(CAFFE2_USE_MPSCNN) && C10_MOBILE
+#if defined(CAFFE2_USE_MPSCNN) && defined(C10_MOBILE)
    if (allowMetalOperators) {
      caffe2::dumpDef(predict_net);
      if (caffe2::tryConvertToMPSCNN(init_net, predict_net, &metal_predict_net)) {
@@ -38,7 +38,7 @@ Caffe2IOSPredictor::Caffe2IOSPredictor(const caffe2::NetDef& init_net,
                                         bool disableMultithreadProcessing,
                                         bool usingMetalOperators)
      : usingMetalOperators(usingMetalOperators), predictor_(init_net, predict_net) {
-#if C10_MOBILE
+#ifdef C10_MOBILE
    if (disableMultithreadProcessing) {
      caffe2::ThreadPool* threadpool = predictor_.ws()->GetThreadPool();
      if (threadpool != nullptr) {
diff --git a/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm b/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm

index 4cd912e..7494fe1 100644 (file)
--- a/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm
+++ b/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm
@@ -1,7 +1,7 @@
  #include "caffe2/core/common.h"
  #include "caffe2/core/context.h"
  
-#if defined(CAFFE2_USE_MPSCNN) && C10_MOBILE
+#if defined(CAFFE2_USE_MPSCNN) && defined(C10_MOBILE)
  
  #include "caffe2/core/operator.h"
  #include "caffe2/core/timer.h"
diff --git a/caffe2/mobile/contrib/ios/mpscnn/mpscnn_context.mm b/caffe2/mobile/contrib/ios/mpscnn/mpscnn_context.mm

index 3553213..a82fbe0 100644 (file)
--- a/caffe2/mobile/contrib/ios/mpscnn/mpscnn_context.mm
+++ b/caffe2/mobile/contrib/ios/mpscnn/mpscnn_context.mm
@@ -1,7 +1,7 @@
  
  #include "caffe2/core/common.h"
  
-#if C10_MOBILE
+#ifdef C10_MOBILE
  
  #include "mpscnn_context.h"
  #include "mpscnn_kernels.h"
diff --git a/caffe2/mobile/contrib/ios/mpscnn/mpscnn_test.mm b/caffe2/mobile/contrib/ios/mpscnn/mpscnn_test.mm

index 7b195cf..0fa08fd 100644 (file)
--- a/caffe2/mobile/contrib/ios/mpscnn/mpscnn_test.mm
+++ b/caffe2/mobile/contrib/ios/mpscnn/mpscnn_test.mm
@@ -1,6 +1,6 @@
  #include "caffe2/core/common.h"
  
-#if C10_MOBILE && defined(CAFFE2_USE_MPSCNN_TEST)
+#if defined(C10_MOBILE) && defined(CAFFE2_USE_MPSCNN_TEST)
  
  #include "mpscnn_context.h"
  #include "mpscnn_graph_mask.h"
diff --git a/caffe2/mobile/contrib/ulp2/ulp.cc b/caffe2/mobile/contrib/ulp2/ulp.cc

index 6f40797..def78d5 100644 (file)
--- a/caffe2/mobile/contrib/ulp2/ulp.cc
+++ b/caffe2/mobile/contrib/ulp2/ulp.cc
@@ -280,7 +280,7 @@ std::unique_ptr<QConvState> create2b1bConvState(Workspace* ws,
    //   r->WQL1Norm.mutable_data<float>()[i] *= center_distance;
    // }
    state->parallelFor = [ws](size_t range, std::function<void(size_t)> f) {
-#if C10_MOBILE
+#ifdef C10_MOBILE
      ws->GetThreadPool()->run([&](int, size_t v) { f(v); }, range);
  #else
      for (size_t v = 0; v < range; ++v) {
diff --git a/caffe2/onnx/backend.cc b/caffe2/onnx/backend.cc

index fae237a..e7c512a 100644 (file)
--- a/caffe2/onnx/backend.cc
+++ b/caffe2/onnx/backend.cc
@@ -6,7 +6,7 @@
  #include "caffe2/utils/map_utils.h"
  #include "caffe2/utils/proto_utils.h"
  
-#if !C10_MOBILE
+#ifndef C10_MOBILE
  #include "onnx/checker.h"
  #include "onnx/optimizer/optimize.h"
  #endif
@@ -69,7 +69,7 @@ caffe2::DeviceOption GetDeviceOption(const Device& onnx_device) {
    return d;
  }
  
-#if !C10_MOBILE
+#ifndef C10_MOBILE
  ModelProto OptimizeOnnx(const ModelProto& input, bool init) {
    std::vector<std::string> passes{"fuse_consecutive_transposes",
                                    "eliminate_nop_transpose",
@@ -1422,7 +1422,7 @@ void Caffe2Backend::OnnxToCaffe2(
      const std::vector<Caffe2Ops>& extras) {
    auto device_option = GetDeviceOption(Device(device));
  
-#if !C10_MOBILE
+#ifndef C10_MOBILE
    ModelProto init_model = OptimizeOnnx(onnx_model, true);
    ModelProto pred_model = OptimizeOnnx(onnx_model, false);
  #else
@@ -1526,7 +1526,7 @@ Caffe2BackendRep* Caffe2Backend::Prepare(
    ModelProto onnx_model;
    ParseProtoFromLargeString(onnx_model_str, &onnx_model);
  
-#if !C10_MOBILE
+#ifndef C10_MOBILE
    ::ONNX_NAMESPACE::checker::check_model(onnx_model);
  #endif
  
diff --git a/caffe2/operators/conv_transpose_op_mobile.cc b/caffe2/operators/conv_transpose_op_mobile.cc

index eec8692..dcac607 100644 (file)
--- a/caffe2/operators/conv_transpose_op_mobile.cc
+++ b/caffe2/operators/conv_transpose_op_mobile.cc
@@ -3,11 +3,7 @@
  
  namespace caffe2 {
  
-#ifndef C10_MOBILE
-#error "mobile build state not defined"
-#endif
-
-#if C10_MOBILE
+#ifdef C10_MOBILE
  // mobile-only implementation (tiled + vectorized + multithreaded)
  REGISTER_CPU_OPERATOR_WITH_ENGINE(
      ConvTranspose,
diff --git a/caffe2/operators/conv_transpose_op_mobile.h b/caffe2/operators/conv_transpose_op_mobile.h

index ba4a767..9f26d8b 100644 (file)
--- a/caffe2/operators/conv_transpose_op_mobile.h
+++ b/caffe2/operators/conv_transpose_op_mobile.h
@@ -3,11 +3,7 @@
  
  #include "caffe2/core/common.h"
  
-#ifndef C10_MOBILE
-#error "mobile build state not defined"
-#endif
-
-#if C10_MOBILE
+#ifdef C10_MOBILE
  
  #include "caffe2/core/context.h"
  #include "caffe2/core/operator.h"
diff --git a/caffe2/operators/conv_transpose_op_mobile_impl.h b/caffe2/operators/conv_transpose_op_mobile_impl.h

index 6869f51..8b60acc 100644 (file)
--- a/caffe2/operators/conv_transpose_op_mobile_impl.h
+++ b/caffe2/operators/conv_transpose_op_mobile_impl.h
@@ -5,11 +5,7 @@
  
  #include "caffe2/core/common.h"
  
-#ifndef C10_MOBILE
-#error "mobile build state not defined"
-#endif
-
-#if C10_MOBILE
+#ifdef C10_MOBILE
  
  #include "caffe2/core/logging.h"
  #include "caffe2/operators/conv_op_shared.h"
diff --git a/caffe2/predictor/predictor_config.cc b/caffe2/predictor/predictor_config.cc

index ab735a9..33fd02f 100644 (file)
--- a/caffe2/predictor/predictor_config.cc
+++ b/caffe2/predictor/predictor_config.cc
@@ -71,7 +71,7 @@ PredictorConfig makePredictorConfig(
    if (run_init) {
      CAFFE_ENFORCE(ws.RunNetOnce(init_net));
    }
-#if C10_MOBILE
+#ifdef C10_MOBILE
    GlobalInit();
  #endif
    if (optimization &&
diff --git a/caffe2/share/contrib/depthwise/depthwise3x3_conv_op.cc b/caffe2/share/contrib/depthwise/depthwise3x3_conv_op.cc

index 7b1c662..37460c8 100644 (file)
--- a/caffe2/share/contrib/depthwise/depthwise3x3_conv_op.cc
+++ b/caffe2/share/contrib/depthwise/depthwise3x3_conv_op.cc
@@ -489,7 +489,7 @@ class Depthwise3x3ConvOp final : public ConvPoolOpBase<CPUContext> {
  
      Timer t;
  
-#if C10_MOBILE
+#ifdef C10_MOBILE
      ws_->GetThreadPool()->run(
          [&](int, int n_g) {
            const int g = n_g / N;
diff --git a/modules/observers/perf_observer.cc b/modules/observers/perf_observer.cc

index ea5d22c..2de1ce6 100644 (file)
--- a/modules/observers/perf_observer.cc
+++ b/modules/observers/perf_observer.cc
@@ -1,6 +1,6 @@
  #include "observers/perf_observer.h"
  #include "observers/observer_config.h"
-#if !C10_MOBILE
+#ifndef C10_MOBILE
  #include "caffe2/core/flags.h"
  #include "observers/net_observer_reporter_print.h"
  #endif
@@ -10,7 +10,7 @@
  #include "caffe2/core/init.h"
  #include "caffe2/core/operator.h"
  
-#if !C10_MOBILE
+#ifndef C10_MOBILE
  C10_DEFINE_int64(
      aiBench_netInitSampleRate,
      0,
@@ -45,7 +45,7 @@ bool registerGlobalPerfNetObserverCreator(int* /*pargc*/, char*** /*pargv*/) {
      return caffe2::make_unique<PerfNetObserver>(subject);
    });
  
-#if !C10_MOBILE
+#if !defined(C10_MOBILE)
    // for aibench usage
    caffe2::ObserverConfig::setReporter(
        caffe2::make_unique<caffe2::NetObserverReporterPrint>());
author	Sebastian Messmer <messmer@fb.com>
	Thu, 28 Feb 2019 01:54:51 +0000 (17:54 -0800)
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
	Thu, 28 Feb 2019 01:57:51 +0000 (17:57 -0800)
aten/src/ATen/core/LegacyTypeDispatch.cpp		patch \| blob \| history
aten/src/ATen/core/aten_interned_strings.h		patch \| blob \| history
aten/src/ATen/core/interned_strings.h		patch \| blob \| history
aten/src/ATen/native/Convolution.cpp		patch \| blob \| history
c10/macros/Macros.h		patch \| blob \| history
c10/util/numa.cpp		patch \| blob \| history
caffe2/core/c10_operator.h		patch \| blob \| history
caffe2/core/operator_c10wrapper.h		patch \| blob \| history
caffe2/mobile/contrib/ios/ios_caffe_predictor.cc		patch \| blob \| history
caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm		patch \| blob \| history
caffe2/mobile/contrib/ios/mpscnn/mpscnn_context.mm		patch \| blob \| history
caffe2/mobile/contrib/ios/mpscnn/mpscnn_test.mm		patch \| blob \| history
caffe2/mobile/contrib/ulp2/ulp.cc		patch \| blob \| history
caffe2/onnx/backend.cc		patch \| blob \| history
caffe2/operators/conv_transpose_op_mobile.cc		patch \| blob \| history
caffe2/operators/conv_transpose_op_mobile.h		patch \| blob \| history
caffe2/operators/conv_transpose_op_mobile_impl.h		patch \| blob \| history
caffe2/predictor/predictor_config.cc		patch \| blob \| history
caffe2/share/contrib/depthwise/depthwise3x3_conv_op.cc		patch \| blob \| history
modules/observers/perf_observer.cc		patch \| blob \| history