Tensor reinitialization codemod - 4/5 (#15967)

author Jerry Zhang <jerryzh@fb.com>

Sat, 12 Jan 2019 00:38:15 +0000 (16:38 -0800)

committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>

Sat, 12 Jan 2019 00:41:19 +0000 (16:41 -0800)
author Jerry Zhang <jerryzh@fb.com>
Sat, 12 Jan 2019 00:38:15 +0000 (16:38 -0800)
committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
Sat, 12 Jan 2019 00:41:19 +0000 (16:41 -0800)
diff --git a/caffe2/core/tensor.h b/caffe2/core/tensor.h

index 7e7b4d8..627293f 100644 (file)
--- a/caffe2/core/tensor.h
+++ b/caffe2/core/tensor.h
@@ -573,6 +573,11 @@ class CAFFE2_API Tensor final {
    }
  };
  
+/**
+ * Reinitialize a Tensor to given dims and options if necessary, note that
+ * this will not do anything if the
+ * Tensor already has correct size and data type
+ */
  CAFFE2_API void ReinitializeTensor(Tensor* t, at::IntList dims, at::TensorOptions options);
  
  CAFFE2_API void ReinitializeAndCopyFrom(
diff --git a/caffe2/operators/quantized/int8_given_tensor_fill_op.h b/caffe2/operators/quantized/int8_given_tensor_fill_op.h

index c93ebf8..6eb5389 100644 (file)
--- a/caffe2/operators/quantized/int8_given_tensor_fill_op.h
+++ b/caffe2/operators/quantized/int8_given_tensor_fill_op.h
@@ -25,7 +25,7 @@ class Int8GivenTensorFillOp final : public Operator<CPUContext> {
  
    bool RunOnDevice() override {
      auto* output = Outputs()[0]->template GetMutable<Int8TensorCPU>();
-    output->t.Resize(shape_);
+    ReinitializeTensor(&output->t, shape_, at::dtype<uint8_t>().device(CPU));
      output->scale = scale_;
      output->zero_point = zero_point_;
      return Fill(output);
@@ -34,7 +34,8 @@ class Int8GivenTensorFillOp final : public Operator<CPUContext> {
   private:
    void ExtractValues() {
      auto source_values = this->template GetSingleArgument<string>("values", "");
-    values_.Resize(source_values.size());
+    ReinitializeTensor(
+        &values_, {static_cast<int64_t>(source_values.size())}, at::dtype<uint8_t>().device(CPU));
      uint8_t* values_data = values_.template mutable_data<uint8_t>();
      for (int i = 0; i < source_values.size(); i++) {
        values_data[i] = static_cast<uint8_t>(source_values[i]);
@@ -57,7 +58,7 @@ class Int8GivenTensorFillOp final : public Operator<CPUContext> {
    float scale_;
    int32_t zero_point_;
    vector<int64_t> shape_;
-  Tensor values_{CPU};
+  Tensor values_;
  };
  
  class Int8GivenIntTensorFillOp final : public Operator<CPUContext> {
@@ -82,7 +83,8 @@ class Int8GivenIntTensorFillOp final : public Operator<CPUContext> {
   private:
    void ExtractValues() {
      auto source_values = this->template GetRepeatedArgument<int32_t>("values");
-    values_.Resize(source_values.size());
+    ReinitializeTensor(
+        &values_, {static_cast<int64_t>(source_values.size())}, at::dtype<int32_t>().device(CPU));
      auto* values_data = values_.template mutable_data<int32_t>();
      for (int i = 0; i < source_values.size(); i++) {
        values_data[i] = static_cast<int32_t>(source_values[i]);
@@ -105,7 +107,7 @@ class Int8GivenIntTensorFillOp final : public Operator<CPUContext> {
    float scale_;
    int32_t zero_point_;
    vector<int64_t> shape_;
-  Tensor values_{CPU};
+  Tensor values_;
  };
  
  } // namespace int8
diff --git a/caffe2/operators/quantized/int8_resize_nearest_op.h b/caffe2/operators/quantized/int8_resize_nearest_op.h

index c0d1528..5ebf629 100644 (file)
--- a/caffe2/operators/quantized/int8_resize_nearest_op.h
+++ b/caffe2/operators/quantized/int8_resize_nearest_op.h
@@ -33,7 +33,7 @@ class Int8ResizeNearestOp final : public Operator<CPUContext> {
      const int OW = IW * width_scale_;
      const int OH = IH * height_scale_;
  
-    Y->t.Resize(N, OH, OW, C);
+    ReinitializeTensor(&Y->t, {N, OH, OW, C}, at::dtype<uint8_t>().device(CPU));
      Y->scale = X.scale;
      Y->zero_point = X.zero_point;
  
diff --git a/caffe2/operators/quantized/int8_roi_align_op.h b/caffe2/operators/quantized/int8_roi_align_op.h

index 57b996c..493132b 100644 (file)
--- a/caffe2/operators/quantized/int8_roi_align_op.h
+++ b/caffe2/operators/quantized/int8_roi_align_op.h
@@ -301,7 +301,10 @@ class Int8RoIAlignOp final : public Operator<CPUContext> {
      assert(sampling_ratio_ >= 0);
  
      // only supports NHWC now
-    Y->t.Resize(R.dim32(0), pooled_height_, pooled_width_, X.t.dim32(3));
+    ReinitializeTensor(
+        &Y->t,
+        {R.dim32(0), pooled_height_, pooled_width_, X.t.dim32(3)},
+        at::dtype<uint8_t>().device(CPU));
      int output_size = Y->t.numel();
  
      ROIAlignForward(
diff --git a/caffe2/operators/quantized/int8_slice_op.h b/caffe2/operators/quantized/int8_slice_op.h

index 10b5b05..7847715 100644 (file)
--- a/caffe2/operators/quantized/int8_slice_op.h
+++ b/caffe2/operators/quantized/int8_slice_op.h
@@ -27,16 +27,18 @@ class Int8SliceOp final : public SliceOp<CPUContext> {
    template <typename SIndex>
    bool DoRunWithType() {
      if (InputSize() > 1) {
-      starts_host_.CopyFrom(Input(1));
-      ends_host_.CopyFrom(Input(2));
+      ReinitializeAndCopyFrom(&starts_host_, at::dtype<SIndex>().device(CPU), Input(1));
+      ReinitializeAndCopyFrom(&ends_host_, at::dtype<SIndex>().device(CPU), Input(2));
      } else {
        if (!statically_inited_) {
          CAFFE_ENFORCE(HasArgument("starts"));
          CAFFE_ENFORCE(HasArgument("ends"));
          CAFFE_ENFORCE_EQ(starts_.size(), ends_.size());
  
-        starts_host_.Resize(starts_.size());
-        ends_host_.Resize(ends_.size());
+        ReinitializeTensor(
+            &starts_host_, {static_cast<int64_t>(starts_.size())}, at::dtype<SIndex>().device(CPU));
+        ReinitializeTensor(
+            &ends_host_, {static_cast<int64_t>(ends_.size())}, at::dtype<SIndex>().device(CPU));
  
          memcpy(
              starts_host_.template mutable_data<SIndex>(),
diff --git a/caffe2/operators/quantized/int8_test_utils.h b/caffe2/operators/quantized/int8_test_utils.h

index 2855957..a4988af 100644 (file)
--- a/caffe2/operators/quantized/int8_test_utils.h
+++ b/caffe2/operators/quantized/int8_test_utils.h
@@ -25,7 +25,7 @@ inline std::unique_ptr<int8::Int8TensorCPU> q(
    auto r = caffe2::make_unique<int8::Int8TensorCPU>();
    r->scale = 0.01;
    r->zero_point = static_cast<int32_t>(std::numeric_limits<uint8_t>::max()) / 2;
-  r->t.Resize(dims);
+  ReinitializeTensor(&r->t, dims, at::dtype<uint8_t>().device(CPU));
    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_int_distribution<uint8_t> dis;
diff --git a/caffe2/operators/rmac_regions_op.cu b/caffe2/operators/rmac_regions_op.cu

index f7fbbaa..8726aee 100644 (file)
--- a/caffe2/operators/rmac_regions_op.cu
+++ b/caffe2/operators/rmac_regions_op.cu
@@ -174,7 +174,7 @@ bool RMACRegionsOp<CUDAContext>::RunOnDevice() {
    // Compute number of regions
    int min_step = 1;
    int max_step = 6;
-  num_rois_.Resize(3); // num_rois, Wd, Hd
+  ReinitializeTensor(&num_rois_, {3}, at::dtype<int>().device(CUDA)); // num_rois, Wd, Hd
    NumRMACRegionsKernel<<<
        1,
        CAFFE_CUDA_NUM_THREADS,
diff --git a/caffe2/operators/rmac_regions_op.h b/caffe2/operators/rmac_regions_op.h

index cd60402..72facf4 100644 (file)
--- a/caffe2/operators/rmac_regions_op.h
+++ b/caffe2/operators/rmac_regions_op.h
@@ -21,7 +21,7 @@ class RMACRegionsOp final : public Operator<Context> {
   protected:
    int scales_;
    float overlap_;
-  Tensor num_rois_{Context::GetDeviceType()};
+  Tensor num_rois_;
  };
  
  } // namespace caffe2
diff --git a/caffe2/operators/segment_reduction_op_gpu.cu b/caffe2/operators/segment_reduction_op_gpu.cu

index 0e5e0c5..4846c75 100644 (file)
--- a/caffe2/operators/segment_reduction_op_gpu.cu
+++ b/caffe2/operators/segment_reduction_op_gpu.cu
@@ -950,7 +950,7 @@ class CUDAUnsortedSegmentSumOp : public Operator<CUDAContext> {
      CAFFE_ENFORCE_EQ(1, segment_ids.ndim(), "SEGMENT_IDS must be a vector");
      int64_t slize_sz = data.size_from_dim(1);
  
-    K_tensor_.Resize(1);
+    ReinitializeTensor(&K_tensor_, {1}, at::dtype<SIndex>().device(CUDA));
      // Get maximum segment id so we can size the output.
      // This must be done synchronously with host.
      if (segment_ids.size() > 4096) {
@@ -966,7 +966,7 @@ class CUDAUnsortedSegmentSumOp : public Operator<CUDAContext> {
            context_.cuda_stream());
  
        // the second call do the real computation.
-      buffer_tensor_.Resize(tmp_storage_bytes);
+      ReinitializeTensor(&buffer_tensor_, {static_cast<int64_t>(tmp_storage_bytes)}, at::dtype<char>().device(CUDA));
        cub::DeviceReduce::Max(
            static_cast<void*>(buffer_tensor_.mutable_data<char>()),
            tmp_storage_bytes,
@@ -1009,7 +1009,7 @@ class CUDAUnsortedSegmentSumOp : public Operator<CUDAContext> {
            nullptr);
      } else {
        // For mean, we need to compute scaling factors
-      scaling_factors_.Resize(K + 1);
+      ReinitializeTensor(&scaling_factors_, {K + 1}, at::dtype<int>().device(CUDA));
        math::Set<int, CUDAContext>(
            scaling_factors_.size(),
            int(0),
@@ -1041,9 +1041,9 @@ class CUDAUnsortedSegmentSumOp : public Operator<CUDAContext> {
    }
  
   private:
-  Tensor buffer_tensor_{CUDA};
-  Tensor K_tensor_{CUDA};
-  Tensor scaling_factors_{CUDA}; // for mean
+  Tensor buffer_tensor_;
+  Tensor K_tensor_;
+  Tensor scaling_factors_; // for mean
  };
  
  template <typename SIndex>
@@ -1202,7 +1202,7 @@ class SortedSegmentRangeMeanGradientOp : public Operator<Context> {
      K += 1;
  
      if (segment_len_.size() != K) {
-      segment_len_.Resize(K);
+      ReinitializeTensor(&segment_len_, {K}, at::dtype<SIndex>().device(CUDA));
      }
  
      math::Set<SIndex, CUDAContext>(
@@ -1236,7 +1236,7 @@ class SortedSegmentRangeMeanGradientOp : public Operator<Context> {
    }
  
   private:
-  Tensor segment_len_{CUDA}; // for mean
+  Tensor segment_len_; // for mean
  };
  
  REGISTER_CUDA_OPERATOR_STR(
diff --git a/caffe2/operators/slice_op.cu b/caffe2/operators/slice_op.cu

index 2c9ba89..f33173f 100644 (file)
--- a/caffe2/operators/slice_op.cu
+++ b/caffe2/operators/slice_op.cu
@@ -255,16 +255,16 @@ class SliceOp<CUDAContext> : public Operator<CUDAContext> {
      auto& data = Input(0);
  
      if (InputSize() > 1) {
-      starts_host_.CopyFrom(Input(1));
-      ends_host_.CopyFrom(Input(2));
+      ReinitializeAndCopyFrom(&starts_host_, at::dtype<SIndex>().device(CPU), Input(1));
+      ReinitializeAndCopyFrom(&ends_host_, at::dtype<SIndex>().device(CPU), Input(2));
      } else {
        if (!statically_inited_) {
          CAFFE_ENFORCE(HasArgument("starts"));
          CAFFE_ENFORCE(HasArgument("ends"));
          CAFFE_ENFORCE_EQ(starts_.size(), ends_.size());
  
-        starts_host_.Resize(starts_.size());
-        ends_host_.Resize(ends_.size());
+        ReinitializeTensor(&starts_host_, {static_cast<int64_t>(starts_.size())}, at::dtype<SIndex>().device(CPU));
+        ReinitializeTensor(&ends_host_, {static_cast<int64_t>(ends_.size())}, at::dtype<SIndex>().device(CPU));
  
          memcpy(
              starts_host_.mutable_data<SIndex>(),
@@ -285,8 +285,8 @@ class SliceOp<CUDAContext> : public Operator<CUDAContext> {
    std::vector<int64_t> starts_;
    std::vector<int64_t> ends_;
    bool statically_inited_;
-  Tensor starts_host_{CPU};
-  Tensor ends_host_{CPU};
+  Tensor starts_host_;
+  Tensor ends_host_;
  
  };  // class SliceOp<CUDAContext>
  
@@ -318,8 +318,8 @@ class SliceGradientOp<CUDAContext> : public Operator<CUDAContext> {
      auto& data = Input(0);
  
      if (InputSize() == 4) {
-      starts_host_.CopyFrom(Input(1));
-      ends_host_.CopyFrom(Input(2));
+      ReinitializeAndCopyFrom(&starts_host_, at::dtype<SIndex>().device(CPU), Input(1));
+      ReinitializeAndCopyFrom(&ends_host_, at::dtype<SIndex>().device(CPU), Input(2));
  
        auto& go = Input(3);
  
@@ -331,8 +331,8 @@ class SliceGradientOp<CUDAContext> : public Operator<CUDAContext> {
          CAFFE_ENFORCE(HasArgument("ends"));
          CAFFE_ENFORCE_EQ(starts_.size(), ends_.size());
  
-        starts_host_.Resize(starts_.size());
-        ends_host_.Resize(ends_.size());
+        ReinitializeTensor(&starts_host_, {static_cast<int64_t>(starts_.size())}, at::dtype<SIndex>().device(CPU));
+        ReinitializeTensor(&ends_host_, {static_cast<int64_t>(ends_.size())}, at::dtype<SIndex>().device(CPU));
  
          memcpy(
              starts_host_.mutable_data<SIndex>(),
@@ -356,8 +356,8 @@ class SliceGradientOp<CUDAContext> : public Operator<CUDAContext> {
    std::vector<int64_t> starts_;
    std::vector<int64_t> ends_;
    bool statically_inited_;
-  Tensor starts_host_{CPU};
-  Tensor ends_host_{CPU};
+  Tensor starts_host_;
+  Tensor ends_host_;
  };  // class SliceGradientOp<CUDAContext>
  REGISTER_CUDA_OPERATOR(SliceGradient, SliceGradientOp<CUDAContext>);
  } // namespace caffe2
diff --git a/caffe2/operators/slice_op.h b/caffe2/operators/slice_op.h

index eb9193f..641c19e 100644 (file)
--- a/caffe2/operators/slice_op.h
+++ b/caffe2/operators/slice_op.h
@@ -219,16 +219,16 @@ class SliceOp : public Operator<Context> {
    template <typename SIndex>
    bool DoRunWithType() {
      if (InputSize() > 1) {
-      starts_host_.CopyFrom(Input(1));
-      ends_host_.CopyFrom(Input(2));
+      ReinitializeAndCopyFrom(&starts_host_, at::dtype<SIndex>().device(CPU), Input(1));
+      ReinitializeAndCopyFrom(&ends_host_, at::dtype<SIndex>().device(CPU), Input(2));
      } else {
        if (!statically_inited_) {
          CAFFE_ENFORCE(HasArgument("starts"));
          CAFFE_ENFORCE(HasArgument("ends"));
          CAFFE_ENFORCE_EQ(starts_.size(), ends_.size());
  
-        starts_host_.Resize(starts_.size());
-        ends_host_.Resize(ends_.size());
+        ReinitializeTensor(&starts_host_, {static_cast<int64_t>(starts_.size())}, at::dtype<SIndex>().device(CPU));
+        ReinitializeTensor(&ends_host_, {static_cast<int64_t>(ends_.size())}, at::dtype<SIndex>().device(CPU));
  
          memcpy(
              starts_host_.template mutable_data<SIndex>(),
@@ -255,8 +255,8 @@ class SliceOp : public Operator<Context> {
    std::vector<int64_t> starts_;
    std::vector<int64_t> ends_;
    bool statically_inited_;
-  Tensor starts_host_{CPU};
-  Tensor ends_host_{CPU};
+  Tensor starts_host_;
+  Tensor ends_host_;
  };
  
  template <class Context>
@@ -285,8 +285,8 @@ class SliceGradientOp : public Operator<Context> {
      auto& data = Input(0);
  
      if (InputSize() == 4) {
-      starts_host_.CopyFrom(Input(1));
-      ends_host_.CopyFrom(Input(2));
+      ReinitializeAndCopyFrom(&starts_host_, at::dtype<SIndex>().device(CPU), Input(1));
+      ReinitializeAndCopyFrom(&ends_host_, at::dtype<SIndex>().device(CPU), Input(2));
  
        auto& go = Input(3);
  
@@ -298,8 +298,10 @@ class SliceGradientOp : public Operator<Context> {
          CAFFE_ENFORCE(HasArgument("ends"));
          CAFFE_ENFORCE_EQ(starts_.size(), ends_.size());
  
-        starts_host_.Resize(starts_.size());
-        ends_host_.Resize(ends_.size());
+        ReinitializeTensor(
+            &starts_host_, {static_cast<int64_t>(starts_.size())}, at::dtype<SIndex>().device(CPU));
+        ReinitializeTensor(
+            &ends_host_, {static_cast<int64_t>(ends_.size())}, at::dtype<SIndex>().device(CPU));
  
          memcpy(
              starts_host_.template mutable_data<SIndex>(),
@@ -324,7 +326,7 @@ class SliceGradientOp : public Operator<Context> {
    std::vector<int64_t> starts_;
    std::vector<int64_t> ends_;
    bool statically_inited_;
-  Tensor starts_host_{CPU};
-  Tensor ends_host_{CPU};
+  Tensor starts_host_;
+  Tensor ends_host_;
  };
  } // namespace caffe2
diff --git a/caffe2/operators/softmax_op.cc b/caffe2/operators/softmax_op.cc

index b2984bc..f925c35 100644 (file)
--- a/caffe2/operators/softmax_op.cc
+++ b/caffe2/operators/softmax_op.cc
@@ -13,18 +13,19 @@ bool SoftmaxOp<float, CPUContext>::RunOnDevice() {
    const int D = X.size_from_dim(canonical_axis);
    auto* Y = Output(0, X.sizes(), at::dtype<float>());
    float* Ydata = Y->template mutable_data<float>();
+  // ReinitializeTensor itself has the effect of caching, so there is no need to check for numel of Tensor
    // First, get scales
-  if (scale_.numel() != N) {
-    scale_.Resize(N);
-  }
-  if (rowmax_.numel() != N) {
-    rowmax_.Resize(N);
-  }
-  if (sum_multiplier_.numel() != D) {
-    sum_multiplier_.Resize(D);
-    math::Set<float, CPUContext>(D, 1.f, sum_multiplier_.mutable_data<float>(),
-                                 &context_);
-  }
+  ReinitializeTensor(
+      &scale_, {N}, at::dtype<float>().device(CPU));
+
+  ReinitializeTensor(
+      &rowmax_, {N}, at::dtype<float>().device(CPU));
+
+  ReinitializeTensor(
+      &sum_multiplier_,
+      {D},
+      at::dtype<float>().device(CPU));
+  math::Set<float, CPUContext>(D, 1.f, sum_multiplier_.mutable_data<float>(), &context_);
  
    SoftmaxCPU(
        context_,
@@ -46,14 +47,18 @@ bool SoftmaxGradientOp<float, CPUContext>::RunOnDevice() {
    auto& dY = Input(1);
  
    const auto canonical_axis = Y.canonical_axis_index(axis_);
-  const int N = Y.size_to_dim(canonical_axis);
-  const int D = Y.size_from_dim(canonical_axis);
+  const int64_t N = Y.size_to_dim(canonical_axis);
+  const int64_t D = Y.size_from_dim(canonical_axis);
    // First, get scales
    if (scale_.numel() != N) {
-    scale_.Resize(N);
+    ReinitializeTensor(
+        &scale_, {N}, at::dtype<float>().device(CPU));
    }
    if (sum_multiplier_.numel() != D) {
-    sum_multiplier_.Resize(D);
+    ReinitializeTensor(
+        &sum_multiplier_,
+        {D},
+        at::dtype<float>().device(CPU));
      math::Set<float, CPUContext>(D, 1.f, sum_multiplier_.mutable_data<float>(),
                                   &context_);
    }
diff --git a/caffe2/operators/softmax_op.h b/caffe2/operators/softmax_op.h

index 42e1b8b..3f12ddc 100644 (file)
--- a/caffe2/operators/softmax_op.h
+++ b/caffe2/operators/softmax_op.h
@@ -19,9 +19,9 @@ class SoftmaxOp final : public Operator<Context> {
  
   protected:
    int axis_;
-  Tensor scale_{Context::GetDeviceType()};
-  Tensor rowmax_{Context::GetDeviceType()};
-  Tensor sum_multiplier_{Context::GetDeviceType()};
+  Tensor scale_;
+  Tensor rowmax_;
+  Tensor sum_multiplier_;
  };
  
  template <typename T, class Context>
@@ -35,8 +35,8 @@ class SoftmaxGradientOp final : public Operator<Context> {
  
   protected:
    int axis_;
-  Tensor scale_{Context::GetDeviceType()};
-  Tensor sum_multiplier_{Context::GetDeviceType()};
+  Tensor scale_;
+  Tensor sum_multiplier_;
  };
  
  } // namespace caffe2
diff --git a/caffe2/operators/softmax_ops.cu b/caffe2/operators/softmax_ops.cu

index be8b455..44fc80b 100644 (file)
--- a/caffe2/operators/softmax_ops.cu
+++ b/caffe2/operators/softmax_ops.cu
@@ -292,7 +292,7 @@ bool SoftmaxWithLossOp<float, CUDAContext>::RunOnDevice() {
    N = X.size_to_dim(canonical_axis); // batch size
    D = X.size_from_dim(canonical_axis);
    P->ResizeLike(X);
-  total_weight_ptr_.Resize(1);
+  ReinitializeTensor(&total_weight_ptr_, {1}, at::dtype<float>().device(CUDA));
  
    if (label_prob_mode_) {
      CAFFE_ENFORCE_GE(T.ndim(), 2);
@@ -310,13 +310,13 @@ bool SoftmaxWithLossOp<float, CUDAContext>::RunOnDevice() {
    auto* avg_loss =
        Output(1, vector<int64_t>(), at::dtype<float>()); // Average loss
    if (losses_.size() != N) {
-    losses_.Resize(N);
+    ReinitializeTensor(&losses_, {N}, at::dtype<float>().device(CUDA));
    }
    if (rowmax_.size() != N) {
-    rowmax_.Resize(N);
+    ReinitializeTensor(&rowmax_, {N}, at::dtype<float>().device(CUDA));
    }
    if (sum_multiplier_.size() != D) {
-    sum_multiplier_.Resize(D);
+    ReinitializeTensor(&sum_multiplier_, {D}, at::dtype<float>().device(CUDA));
      math::Set<float, CUDAContext>(
          D, 1.f, sum_multiplier_.mutable_data<float>(), &context_);
    }
@@ -398,7 +398,7 @@ bool SpatialSoftmaxWithLossOp<float, CUDAContext>::RunOnDevice() {
    N = X.dim32(0);
    D = X.dim32(1);
    P->ResizeLike(X);
-  total_weight_ptr_.Resize(1);
+  ReinitializeTensor(&total_weight_ptr_, {1}, at::dtype<float>().device(CUDA));
    CAFFE_ENFORCE_EQ(X.ndim(), 4);
    CAFFE_ENFORCE_EQ(T.ndim(), 3);
    CAFFE_ENFORCE_EQ(T.dim32(0), N);
@@ -406,10 +406,10 @@ bool SpatialSoftmaxWithLossOp<float, CUDAContext>::RunOnDevice() {
    int H = X.dim32(2);
    int W = X.dim32(3);
    if (losses_.size() != N * W * H) {
-    losses_.Resize(N * W * H);
+    ReinitializeTensor(&losses_, {N * W * H}, at::dtype<float>().device(CUDA));
    }
    if (weights_.size() != N * W * H) {
-    weights_.Resize(N * W * H);
+    ReinitializeTensor(&weights_, {N * W * H}, at::dtype<float>().device(CUDA));
    }
  
    const float* Xdata = X.data<float>();
@@ -498,7 +498,7 @@ bool SoftmaxWithLossGradientOp<float, CUDAContext>::RunOnDevice() {
    N = X.size_to_dim(canonical_axis); // batch size
    D = X.size_from_dim(canonical_axis);
  
-  total_weight_ptr_.Resize(1);
+  ReinitializeTensor(&total_weight_ptr_, {1}, at::dtype<float>().device(CUDA));
  
    if (label_prob_mode_) {
      CAFFE_ENFORCE_GE(T.ndim(), 2);
@@ -614,7 +614,7 @@ bool SpatialSoftmaxWithLossGradientOp<float, CUDAContext>::RunOnDevice() {
    N = X.dim32(0);
    D = X.dim32(1);
  
-  total_weight_ptr_.Resize(1);
+  ReinitializeTensor(&total_weight_ptr_, {1}, at::dtype<float>().device(CUDA));
    // Spatial mode, compute softmax for each x, y location
    CAFFE_ENFORCE_EQ(X.ndim(), 4);
    CAFFE_ENFORCE_EQ(T.ndim(), 3);
@@ -623,7 +623,7 @@ bool SpatialSoftmaxWithLossGradientOp<float, CUDAContext>::RunOnDevice() {
    int W = X.dim32(3);
    dX->ResizeLike(X);
    if (weights_.size() != N * W * H) {
-    weights_.Resize(N * W * H);
+    ReinitializeTensor(&weights_, {N * W * H}, at::dtype<float>().device(CUDA));
    }
  
    const float* Pdata = P.data<float>();
@@ -695,15 +695,15 @@ bool SoftmaxOp<float, CUDAContext>::RunOnDevice() {
      return true;
    }
    if (sum_multiplier_.size() != D) {
-    sum_multiplier_.Resize(D);
+    ReinitializeTensor(&sum_multiplier_, {D}, at::dtype<float>().device(CUDA));
      math::Set<float, CUDAContext>(
          D, 1.f, sum_multiplier_.mutable_data<float>(), &context_);
    }
    if (scale_.size() != N) {
-    scale_.Resize(N);
+    ReinitializeTensor(&scale_, {N}, at::dtype<float>().device(CUDA));
    }
    if (rowmax_.size() != N) {
-    rowmax_.Resize(N);
+    ReinitializeTensor(&rowmax_, {N}, at::dtype<float>().device(CUDA));
    }
    Softmax(
        N,
diff --git a/caffe2/operators/softmax_with_loss_op.cc b/caffe2/operators/softmax_with_loss_op.cc

index 8a7a19b..81a6c7e 100644 (file)
--- a/caffe2/operators/softmax_with_loss_op.cc
+++ b/caffe2/operators/softmax_with_loss_op.cc
@@ -156,7 +156,7 @@ bool SoftmaxWithLossOp<float, CPUContext>::RunOnDevice() {
    auto& T = Input(1); // Labels / targets
  
    const auto canonical_axis = X.canonical_axis_index(axis_);
-  int N, D;
+  int64_t N, D;
    N = X.size_to_dim(canonical_axis); // batch size
    D = X.size_from_dim(canonical_axis);
    auto* P =
@@ -179,13 +179,18 @@ bool SoftmaxWithLossOp<float, CPUContext>::RunOnDevice() {
    }
  
    if (sum_multiplier_.numel() != D) {
-    sum_multiplier_.Resize(D);
+    ReinitializeTensor(
+        &sum_multiplier_,
+        {D},
+        at::dtype<float>().device(CPU));
      math::Set<float, CPUContext>(
          D, 1.f, sum_multiplier_.mutable_data<float>(), &context_);
    }
  
-  rowmax_.Resize(N);
-  losses_.Resize(N);
+  ReinitializeTensor(
+      &rowmax_, {N}, at::dtype<float>().device(CPU));
+  ReinitializeTensor(
+      &losses_, {N}, at::dtype<float>().device(CPU));
  
    SoftmaxCPU(
        context_,
diff --git a/caffe2/operators/softmax_with_loss_op.h b/caffe2/operators/softmax_with_loss_op.h

index dd1f060..ce47072 100644 (file)
--- a/caffe2/operators/softmax_with_loss_op.h
+++ b/caffe2/operators/softmax_with_loss_op.h
@@ -32,12 +32,11 @@ class SoftmaxWithLossOp final : public Operator<Context> {
    StorageOrder order_;
    int axis_;
  
-  Tensor losses_{Context::GetDeviceType()}; // Per example loss
-  Tensor rowmax_{Context::GetDeviceType()}; // per example row max
+  Tensor losses_; // Per example loss
+  Tensor rowmax_; // per example row max
    Tensor weights_{Context::GetDeviceType()}; // unignored weights
-  Tensor sum_multiplier_{
-      Context::GetDeviceType()}; // Vector of ones for summing via dot prod
-  Tensor total_weight_ptr_{Context::GetDeviceType()};
+  Tensor sum_multiplier_; // Vector of ones for summing via dot prod
+  Tensor total_weight_ptr_;
    Tensor scratch_{Context::GetDeviceType()};
  };
  
@@ -65,7 +64,7 @@ class SoftmaxWithLossGradientOp final : public Operator<Context> {
    int label_prob_mode_;
    Tensor sum_multiplier_{Context::GetDeviceType()};
    Tensor weights_{Context::GetDeviceType()}; // unignored weights
-  Tensor total_weight_ptr_{Context::GetDeviceType()};
+  Tensor total_weight_ptr_;
    StorageOrder order_;
    bool only_loss_;
    int axis_;
diff --git a/caffe2/operators/sparse_to_dense_op.h b/caffe2/operators/sparse_to_dense_op.h

index 36245b9..a489b01 100644 (file)
--- a/caffe2/operators/sparse_to_dense_op.h
+++ b/caffe2/operators/sparse_to_dense_op.h
@@ -43,7 +43,7 @@ class SparseToDenseOp final : public Operator<Context> {
  
      // Awkward way to get the max element to make it work with both CUDA
      // and CPU.
-    max_element_.Resize(1);
+    ReinitializeTensor(&max_element_, {1}, at::dtype<TInd>().device(Context::GetDeviceType()));
      TInd* max_element_ptr = max_element_.template mutable_data<TInd>();
      math::ReduceMax<TInd>(sparse_indices_len, sparse_indices_vec, max_element_ptr,
            &scratch_, &context_);
@@ -115,7 +115,7 @@ class SparseToDenseOp final : public Operator<Context> {
    int output_first_dim_;
    Tensor scratch_{Context::GetDeviceType()};
    Tensor max_element_host_{CPU};
-  Tensor max_element_{Context::GetDeviceType()};
+  Tensor max_element_;
  
    INPUT_TAGS(INDICES, VALUES, DATA_TO_INFER_DIM);
  };
diff --git a/caffe2/operators/spatial_batch_norm_op.h b/caffe2/operators/spatial_batch_norm_op.h

index c51a891..eb8185a 100644 (file)
--- a/caffe2/operators/spatial_batch_norm_op.h
+++ b/caffe2/operators/spatial_batch_norm_op.h
@@ -68,8 +68,10 @@ class SpatialBNOp : public Operator<Context> {
      const T* scale_data = scale.template data<T>();
      const T* bias_data = bias.template data<T>();
      T* Y_data = Y->template mutable_data<T>();
-    alpha_.Resize(C);
-    beta_.Resize(C);
+    ReinitializeTensor(
+        &alpha_, {C}, at::dtype<T>().device(Context::GetDeviceType()));
+    ReinitializeTensor(
+        &beta_, {C}, at::dtype<T>().device(Context::GetDeviceType()));
      T* alpha_data = alpha_.template mutable_data<T>();
      T* beta_data = beta_.template mutable_data<T>();
      if (is_test_) {
@@ -257,8 +259,8 @@ class SpatialBNOp : public Operator<Context> {
    const StorageOrder order_;
    const int num_batches_;
  
-  Tensor alpha_{Context::GetDeviceType()};
-  Tensor beta_{Context::GetDeviceType()};
+  Tensor alpha_;
+  Tensor beta_;
  
    INPUT_TAGS(
        INPUT,
@@ -347,9 +349,12 @@ class SpatialBNGradientOp : public Operator<Context> {
        math::Set<T, Context>(C, T(0), dbias_data, &context_);
        return true;
      }
-    alpha_.Resize(C);
-    beta_.Resize(C);
-    gamma_.Resize(C);
+    ReinitializeTensor(
+        &alpha_, {C}, at::dtype<T>().device(Context::GetDeviceType()));
+    ReinitializeTensor(
+        &beta_, {C}, at::dtype<T>().device(Context::GetDeviceType()));
+    ReinitializeTensor(
+        &gamma_, {C}, at::dtype<T>().device(Context::GetDeviceType()));
      T* alpha_data = alpha_.template mutable_data<T>();
      T* beta_data = beta_.template mutable_data<T>();
      T* gamma_data = gamma_.template mutable_data<T>();
@@ -441,9 +446,9 @@ class SpatialBNGradientOp : public Operator<Context> {
    const StorageOrder order_;
    const int num_batches_;
  
-  Tensor alpha_{Context::GetDeviceType()};
-  Tensor beta_{Context::GetDeviceType()};
-  Tensor gamma_{Context::GetDeviceType()};
+  Tensor alpha_;
+  Tensor beta_;
+  Tensor gamma_;
  
    INPUT_TAGS(
        INPUT,
diff --git a/caffe2/operators/spatial_softmax_with_loss_op.cc b/caffe2/operators/spatial_softmax_with_loss_op.cc

index 6af9108..9c650bf 100644 (file)
--- a/caffe2/operators/spatial_softmax_with_loss_op.cc
+++ b/caffe2/operators/spatial_softmax_with_loss_op.cc
@@ -73,7 +73,10 @@ bool SpatialSoftmaxWithLossOp<float, CPUContext>::RunOnDevice() {
        Output(0, X.sizes(), at::dtype<float>()); // Probabilities from softmax
  
    if (sum_multiplier_.numel() != D) {
-    sum_multiplier_.Resize(D);
+    ReinitializeTensor(
+        &sum_multiplier_,
+        {D},
+        at::dtype<float>().device(CPU));
      math::Set<float, CPUContext>(
          D, 1.f, sum_multiplier_.mutable_data<float>(), &context_);
    }
diff --git a/caffe2/operators/spatial_softmax_with_loss_op.h b/caffe2/operators/spatial_softmax_with_loss_op.h

index 06ea529..c728a45 100644 (file)
--- a/caffe2/operators/spatial_softmax_with_loss_op.h
+++ b/caffe2/operators/spatial_softmax_with_loss_op.h
@@ -28,12 +28,11 @@ class SpatialSoftmaxWithLossOp final : public Operator<Context> {
    float scale_;
    StorageOrder order_;
  
-  Tensor losses_{Context::GetDeviceType()}; // Per example loss
+  Tensor losses_; // Per example loss
    Tensor rowmax_{Context::GetDeviceType()}; // per example row max
-  Tensor weights_{Context::GetDeviceType()}; // unignored weights
-  Tensor sum_multiplier_{
-      Context::GetDeviceType()}; // Vector of ones for summing via dot prod
-  Tensor total_weight_ptr_{Context::GetDeviceType()};
+  Tensor weights_; // unignored weights
+  Tensor sum_multiplier_; // Vector of ones for summing via dot prod
+  Tensor total_weight_ptr_;
    Tensor scratch_{Context::GetDeviceType()};
  };
  
@@ -57,8 +56,8 @@ class SpatialSoftmaxWithLossGradientOp final : public Operator<Context> {
   protected:
    float scale_;
    Tensor sum_multiplier_{Context::GetDeviceType()};
-  Tensor weights_{Context::GetDeviceType()}; // unignored weights
-  Tensor total_weight_ptr_{Context::GetDeviceType()};
+  Tensor weights_; // unignored weights
+  Tensor total_weight_ptr_;
    StorageOrder order_;
    bool only_loss_;
    Tensor scratch_{Context::GetDeviceType()};
diff --git a/caffe2/operators/top_k.cu b/caffe2/operators/top_k.cu

index 703b679..a6edc10 100644 (file)
--- a/caffe2/operators/top_k.cu
+++ b/caffe2/operators/top_k.cu
@@ -166,9 +166,9 @@ class TopKCudaOp : public Operator<Context> {
    int axis_;
  
    // Buffers for CUDAContext.
-  Tensor input_transposed_buffer_{CUDA};
-  Tensor values_transposed_buffer_{CUDA};
-  Tensor indices_transposed_buffer_{CUDA};
+  Tensor input_transposed_buffer_;
+  Tensor values_transposed_buffer_;
+  Tensor indices_transposed_buffer_;
  
    // Shape tensors on device for CUDAContext.
    Tensor input_dims_device_{CUDA};
@@ -227,10 +227,9 @@ bool TopKCudaOp<T, Context>::RunOnDevice() {
                                       static_cast<int>(inner_size),
                                       static_cast<int>(next_size)};
      const std::array<int, 3> axes = {0, 2, 1};
-    input_transposed_buffer_.Resize(
-        std::vector<int64_t>{outer_size, inner_size});
-    values_transposed_buffer_.Resize(std::vector<int64_t>{outer_size, k_});
-    indices_transposed_buffer_.Resize(std::vector<int64_t>{outer_size, k_});
+    ReinitializeTensor(&input_transposed_buffer_,  std::vector<int64_t>{outer_size, inner_size}, at::dtype<T>().device(CUDA));
+    ReinitializeTensor(&values_transposed_buffer_, std::vector<int64_t>{outer_size, k_}, at::dtype<T>().device(CUDA));
+    ReinitializeTensor(&indices_transposed_buffer_, std::vector<int64_t>{outer_size, k_}, at::dtype<int64_t>().device(CUDA));
      math::Transpose(
          3,
          dims.data(),
diff --git a/caffe2/operators/tt_linear_op.h b/caffe2/operators/tt_linear_op.h

index 793dfbe..3962f3e 100644 (file)
--- a/caffe2/operators/tt_linear_op.h
+++ b/caffe2/operators/tt_linear_op.h
@@ -138,7 +138,10 @@ class TTLinearOp final : public Operator<Context> {
      // Add bias term
      if (bias_multiplier_.numel() != batch_size) {
        // If the helper bias multiplier is not M, reshape and fill it with one.
-      bias_multiplier_.Resize(batch_size);
+      ReinitializeTensor(
+          &bias_multiplier_,
+          {batch_size},
+          at::dtype<T>().device(Context::GetDeviceType()));
        math::Set<T, Context>(
            batch_size,
            static_cast<T>(1),
@@ -161,7 +164,7 @@ class TTLinearOp final : public Operator<Context> {
    }
  
   protected:
-  Tensor bias_multiplier_{Context::GetDeviceType()};
+  Tensor bias_multiplier_;
    std::vector<int> inp_sizes_;
    std::vector<int> out_sizes_;
    std::vector<int> tt_ranks_;
diff --git a/caffe2/operators/unique_ops.cu b/caffe2/operators/unique_ops.cu

index 125870d..e8d2e5d 100644 (file)
--- a/caffe2/operators/unique_ops.cu
+++ b/caffe2/operators/unique_ops.cu
@@ -69,7 +69,7 @@ bool UniqueOp<CUDAContext>::DoRunWithType() {
    }
  
    const T* input = inputTensor.template data<T>();
-  thrust_unique_buffer_.Resize(N);
+  ReinitializeTensor(&thrust_unique_buffer_, {N}, at::dtype<T>().device(Context::GetDeviceType()));
    auto* buffer = thrust_unique_buffer_.template mutable_data<T>();
    context_.CopyItemsSameDevice(inputTensor.meta(), N, input, buffer);
  
diff --git a/caffe2/operators/unique_ops.h b/caffe2/operators/unique_ops.h

index 5def615..8137809 100644 (file)
--- a/caffe2/operators/unique_ops.h
+++ b/caffe2/operators/unique_ops.h
@@ -47,7 +47,7 @@ class UniqueOp : public Operator<Context> {
  
   private:
    vector<int> order_;
-  Tensor thrust_unique_buffer_{Context::GetDeviceType()};
+  Tensor thrust_unique_buffer_;
    Tensor cuda_order_buffer_{Context::GetDeviceType()};
    Tensor second_order_buffer_{Context::GetDeviceType()};
  
diff --git a/caffe2/operators/utility_ops.cu b/caffe2/operators/utility_ops.cu

index 0d9bb32..a87a039 100644 (file)
--- a/caffe2/operators/utility_ops.cu
+++ b/caffe2/operators/utility_ops.cu
@@ -69,7 +69,7 @@ bool NanCheckOp<CUDAContext>::RunOnDevice() {
    const size_t N = X.size();
    const float* data_ptr = X.data<float>();
  
-  scratch_.Resize(1);
+  ReinitializeTensor(&scratch_, {1}, at::dtype<bool>().device(CUDA));
    math::Set<bool, CUDAContext>(
        1, false, scratch_.mutable_data<bool>(), &context_);
    NanCheckKernel<<<
@@ -296,10 +296,10 @@ bool ScatterWeightedSumOp<float, CUDAContext>::DoRunWithType() {
    // consecutively in device memory, copy pointers to a host vector and then
    // copy back into a device array.
    const int64_t B = (InputSize() - 3) / 2;
-  x_data_host_.Resize(B);
-  weights_host_.Resize(B);
-  x_data_device_.Resize(B);
-  weights_device_.Resize(B);
+  ReinitializeTensor(&x_data_host_, {B}, at::dtype<const float*>().device(CPU));
+  ReinitializeTensor(&weights_host_, {B}, at::dtype<const float*>().device(CPU));
+  ReinitializeTensor(&x_data_device_, {B}, at::dtype<const float*>().device(CUDA));
+  ReinitializeTensor(&weights_device_, {B}, at::dtype<const float*>().device(CUDA));
  
    const float** x_data_host = x_data_host_.mutable_data<const float*>();
    const float** weights_host = weights_host_.mutable_data<const float*>();
diff --git a/caffe2/operators/utility_ops.h b/caffe2/operators/utility_ops.h

index 174f401..dd34102 100644 (file)
--- a/caffe2/operators/utility_ops.h
+++ b/caffe2/operators/utility_ops.h
@@ -27,7 +27,7 @@ class NanCheckOp final : public Operator<Context> {
  
   private:
    TensorPrinter tensorPrinter_;
-  Tensor scratch_{Context::GetDeviceType()};
+  Tensor scratch_;
  };
  
  struct GetNanCheckGradient : public GradientMakerBase {
@@ -562,10 +562,10 @@ class ScatterWeightedSumOp : public Operator<Context> {
      }
      return true;
    }
-  Tensor x_data_host_{CPU};
-  Tensor weights_host_{CPU};
-  Tensor x_data_device_{Context::GetDeviceType()};
-  Tensor weights_device_{Context::GetDeviceType()};
+  Tensor x_data_host_;
+  Tensor weights_host_;
+  Tensor x_data_device_;
+  Tensor weights_device_;
  };
  
  /**
diff --git a/caffe2/operators/weighted_sample_op.cu b/caffe2/operators/weighted_sample_op.cu

index 2e98692..145b9d7 100644 (file)
--- a/caffe2/operators/weighted_sample_op.cu
+++ b/caffe2/operators/weighted_sample_op.cu
@@ -54,7 +54,7 @@ bool WeightedSampleOp<float, CUDAContext>::RunOnDevice() {
  
    if (batch_size > 0 && weights_dim > 0) {
      auto* out_idx = Output(0, {batch_size, 1}, at::dtype<int>());
-    unif_samples_.Resize(batch_size);
+    ReinitializeTensor(&unif_samples_, {batch_size}, at::dtype<float>().device(CUDA));
  
      const float* in_weights_data = in_weights.data<float>();
      const float* in_val_data = nullptr;
diff --git a/caffe2/operators/weighted_sample_op.h b/caffe2/operators/weighted_sample_op.h

index ac5a7cd..1474a9a 100644 (file)
--- a/caffe2/operators/weighted_sample_op.h
+++ b/caffe2/operators/weighted_sample_op.h
@@ -22,7 +22,7 @@ class WeightedSampleOp final : public Operator<Context> {
  
   private:
    vector<float> cum_mass_;
-  Tensor unif_samples_{Context::GetDeviceType()};
+  Tensor unif_samples_;
  };
  
  } // namespace caffe2
author	Jerry Zhang <jerryzh@fb.com>
	Sat, 12 Jan 2019 00:38:15 +0000 (16:38 -0800)
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
	Sat, 12 Jan 2019 00:41:19 +0000 (16:41 -0800)
caffe2/core/tensor.h		patch \| blob \| history
caffe2/operators/quantized/int8_given_tensor_fill_op.h		patch \| blob \| history
caffe2/operators/quantized/int8_resize_nearest_op.h		patch \| blob \| history
caffe2/operators/quantized/int8_roi_align_op.h		patch \| blob \| history
caffe2/operators/quantized/int8_slice_op.h		patch \| blob \| history
caffe2/operators/quantized/int8_test_utils.h		patch \| blob \| history
caffe2/operators/rmac_regions_op.cu		patch \| blob \| history
caffe2/operators/rmac_regions_op.h		patch \| blob \| history
caffe2/operators/segment_reduction_op_gpu.cu		patch \| blob \| history
caffe2/operators/slice_op.cu		patch \| blob \| history
caffe2/operators/slice_op.h		patch \| blob \| history
caffe2/operators/softmax_op.cc		patch \| blob \| history
caffe2/operators/softmax_op.h		patch \| blob \| history
caffe2/operators/softmax_ops.cu		patch \| blob \| history
caffe2/operators/softmax_with_loss_op.cc		patch \| blob \| history
caffe2/operators/softmax_with_loss_op.h		patch \| blob \| history
caffe2/operators/sparse_to_dense_op.h		patch \| blob \| history
caffe2/operators/spatial_batch_norm_op.h		patch \| blob \| history
caffe2/operators/spatial_softmax_with_loss_op.cc		patch \| blob \| history
caffe2/operators/spatial_softmax_with_loss_op.h		patch \| blob \| history
caffe2/operators/top_k.cu		patch \| blob \| history
caffe2/operators/tt_linear_op.h		patch \| blob \| history
caffe2/operators/unique_ops.cu		patch \| blob \| history
caffe2/operators/unique_ops.h		patch \| blob \| history
caffe2/operators/utility_ops.cu		patch \| blob \| history
caffe2/operators/utility_ops.h		patch \| blob \| history
caffe2/operators/weighted_sample_op.cu		patch \| blob \| history
caffe2/operators/weighted_sample_op.h		patch \| blob \| history