Tensor construction codemod(ResizeLike) - 2/3 (#15940)

author Jerry Zhang <jerryzh@fb.com>

Sat, 12 Jan 2019 01:39:11 +0000 (17:39 -0800)

committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>

Sat, 12 Jan 2019 01:41:48 +0000 (17:41 -0800)
author Jerry Zhang <jerryzh@fb.com>
Sat, 12 Jan 2019 01:39:11 +0000 (17:39 -0800)
committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
Sat, 12 Jan 2019 01:41:48 +0000 (17:41 -0800)
diff --git a/caffe2/operators/find_op.cu b/caffe2/operators/find_op.cu

index da6061e..5664e12 100644 (file)
--- a/caffe2/operators/find_op.cu
+++ b/caffe2/operators/find_op.cu
@@ -33,8 +33,8 @@ template <typename T>
  bool FindOp<CUDAContext>::DoRunWithType() {
    auto& idx = Input(0);
    auto& needles = Input(1);
-  auto* res_indices = Output(0);
-  res_indices->ResizeLike(needles);
+
+  auto* res_indices = Output(0, needles.sizes(), at::dtype<int>());
  
    const T* idx_data = idx.data<T>();
    const T* needles_data = needles.data<T>();
diff --git a/caffe2/operators/floor_op.cu b/caffe2/operators/floor_op.cu

index 41723d8..c2e34d7 100644 (file)
--- a/caffe2/operators/floor_op.cu
+++ b/caffe2/operators/floor_op.cu
@@ -14,9 +14,9 @@ __global__ void FloorKernel(const int N, const T* X, T* Y) {
  template <>
  bool FloorOp<float, CUDAContext>::RunOnDevice() {
    auto& X = Input(0);
-  auto* Y = Output(0);
+
    CAFFE_ENFORCE_GT(X.size(), 0);
-  Y->ResizeLike(X);
+  auto* Y = Output(0, X.sizes(), at::dtype<float>());
    FloorKernel<<<
        CAFFE_GET_BLOCKS(X.size()),
        CAFFE_CUDA_NUM_THREADS,
diff --git a/caffe2/operators/half_float_ops.cu b/caffe2/operators/half_float_ops.cu

index fdcb036..9dba9d6 100644 (file)
--- a/caffe2/operators/half_float_ops.cu
+++ b/caffe2/operators/half_float_ops.cu
@@ -22,8 +22,8 @@ __global__ void HalfToFloatKernel(const int N, const half* X, float* Y) {
  template <>
  bool FloatToHalfOp<CUDAContext>::RunOnDevice() {
    auto& X = Input(0);
-  auto* Y = Output(0);
-  Y->ResizeLike(X);
+
+  auto* Y = Output(0, X.sizes(), at::dtype<at::Half>());
    FloatToHalfKernel<<<
        CAFFE_GET_BLOCKS(X.size()),
        CAFFE_CUDA_NUM_THREADS,
@@ -38,8 +38,8 @@ bool FloatToHalfOp<CUDAContext>::RunOnDevice() {
  template <>
  bool HalfToFloatOp<CUDAContext>::RunOnDevice() {
    auto& X = Input(0);
-  auto* Y = Output(0);
-  Y->ResizeLike(X);
+
+  auto* Y = Output(0, X.sizes(), at::dtype<float>());
    HalfToFloatKernel<<<
        CAFFE_GET_BLOCKS(X.size()),
        CAFFE_CUDA_NUM_THREADS,
diff --git a/caffe2/operators/instance_norm_op.cu b/caffe2/operators/instance_norm_op.cu

index 8796684..475ddcf 100644 (file)
--- a/caffe2/operators/instance_norm_op.cu
+++ b/caffe2/operators/instance_norm_op.cu
@@ -187,7 +187,7 @@ bool InstanceNormOp<float, CUDAContext>::RunOnDeviceWithOrderNHWC() {
    const auto& input = Input(INPUT);
    const auto& scale = Input(SCALE);
    const auto& bias = Input(BIAS);
-  auto output = Output(OUTPUT);
+
    auto mean = OutputSize() >= 2 ? Output(MEAN) : &mean_;
    auto inv_stdev = OutputSize() >= 3 ? Output(INV_STDEV) : &inv_stdev_;
    CAFFE_ENFORCE_EQ(4, input.ndim());
@@ -199,7 +199,7 @@ bool InstanceNormOp<float, CUDAContext>::RunOnDeviceWithOrderNHWC() {
    CAFFE_ENFORCE_EQ(C, scale.dim32(0));
    CAFFE_ENFORCE_EQ(1, bias.ndim());
    CAFFE_ENFORCE_EQ(C, bias.dim32(0));
-  output->ResizeLike(input);
+  auto output = Output(OUTPUT, input.sizes(), at::dtype<float>());
    mean->Resize(N, C);
    inv_stdev->Resize(N, C);
  
@@ -264,7 +264,7 @@ bool InstanceNormOp<float, CUDAContext>::RunOnDeviceWithOrderNCHW() {
    const auto& input = Input(INPUT);
    const auto& scale = Input(SCALE);
    const auto& bias = Input(BIAS);
-  auto output = Output(OUTPUT);
+
    auto mean = OutputSize() >= 2 ? Output(MEAN) : &mean_;
    auto inv_stdev = OutputSize() >= 3 ? Output(INV_STDEV) : &inv_stdev_;
    CAFFE_ENFORCE_EQ(4, input.ndim());
@@ -276,7 +276,7 @@ bool InstanceNormOp<float, CUDAContext>::RunOnDeviceWithOrderNCHW() {
    CAFFE_ENFORCE_EQ(C, scale.dim32(0));
    CAFFE_ENFORCE_EQ(1, bias.ndim());
    CAFFE_ENFORCE_EQ(C, bias.dim32(0));
-  output->ResizeLike(input);
+  auto output = Output(OUTPUT, input.sizes(), at::dtype<float>());
    mean->Resize(N, C);
    inv_stdev->Resize(N, C);
  
@@ -344,9 +344,7 @@ bool InstanceNormGradientOp<float, CUDAContext>::RunOnDeviceWithOrderNHWC() {
    const auto& output_grad = Input(OUTPUT_GRAD);
    const auto& mean = InputSize() >= 5 ? Input(MEAN) : mean_;
    const auto& inv_stdev = InputSize() >= 6 ? Input(INV_STDEV) : inv_stdev_;
-  auto input_grad = Output(INPUT_GRAD);
-  auto scale_grad = Output(SCALE_GRAD);
-  auto bias_grad = Output(BIAS_GRAD);
+
    CAFFE_ENFORCE_EQ(4, input.ndim());
    const int N = input.dim32(0);
    const int H = input.dim32(1);
@@ -361,9 +359,9 @@ bool InstanceNormGradientOp<float, CUDAContext>::RunOnDeviceWithOrderNHWC() {
    CAFFE_ENFORCE_EQ(H, output_grad.dim32(1));
    CAFFE_ENFORCE_EQ(W, output_grad.dim32(2));
    CAFFE_ENFORCE_EQ(C, output_grad.dim32(3));
-  input_grad->ResizeLike(input);
-  scale_grad->ResizeLike(scale);
-  bias_grad->ResizeLike(bias);
+  auto input_grad = Output(INPUT_GRAD, input.sizes(), at::dtype<float>());
+  auto scale_grad = Output(SCALE_GRAD, scale.sizes(), at::dtype<float>());
+  auto bias_grad = Output(BIAS_GRAD, bias.sizes(), at::dtype<float>());
  
    const auto input_data = input.data<float>();
    const auto scale_data = scale.data<float>();
@@ -475,9 +473,7 @@ bool InstanceNormGradientOp<float, CUDAContext>::RunOnDeviceWithOrderNCHW() {
    const auto& output_grad = Input(OUTPUT_GRAD);
    const auto& mean = InputSize() >= 5 ? Input(MEAN) : mean_;
    const auto& inv_stdev = InputSize() >= 6 ? Input(INV_STDEV) : inv_stdev_;
-  auto input_grad = Output(INPUT_GRAD);
-  auto scale_grad = Output(SCALE_GRAD);
-  auto bias_grad = Output(BIAS_GRAD);
+
    CAFFE_ENFORCE_EQ(4, input.ndim());
    const int N = input.dim32(0);
    const int C = input.dim32(1);
@@ -492,9 +488,9 @@ bool InstanceNormGradientOp<float, CUDAContext>::RunOnDeviceWithOrderNCHW() {
    CAFFE_ENFORCE_EQ(C, output_grad.dim32(1));
    CAFFE_ENFORCE_EQ(H, output_grad.dim32(2));
    CAFFE_ENFORCE_EQ(W, output_grad.dim32(3));
-  input_grad->ResizeLike(input);
-  scale_grad->ResizeLike(scale);
-  bias_grad->ResizeLike(bias);
+  auto input_grad = Output(INPUT_GRAD, input.sizes(), at::dtype<float>());
+  auto scale_grad = Output(SCALE_GRAD, scale.sizes(), at::dtype<float>());
+  auto bias_grad = Output(BIAS_GRAD, bias.sizes(), at::dtype<float>());
  
    const auto input_data = input.data<float>();
    const auto scale_data = scale.data<float>();
diff --git a/caffe2/operators/integral_image_op.cu b/caffe2/operators/integral_image_op.cu

index 1e2b710..4db5370 100644 (file)
--- a/caffe2/operators/integral_image_op.cu
+++ b/caffe2/operators/integral_image_op.cu
@@ -119,7 +119,7 @@ __global__ void ColPassGradientKernel(
  template <>
  bool IntegralImageOp<float, CUDAContext>::RunOnDevice() {
    auto& X = Input(0);
-  
+
    CAFFE_ENFORCE(X.ndim() == 4, "Only supports 4D tensors for the momement");
  
    // Input is (N, C, H, W)
@@ -165,10 +165,11 @@ bool IntegralImageGradientOp<float, CUDAContext>::RunOnDevice() {
    auto& X = Input(0); // Original input to "forward" op
    auto& dY = Input(1); // Gradient of net w.r.t. output of "forward" op
                         // (aka "gradOutput")
-  auto* dX = Output(0); // Gradient of net w.r.t. input to
-                        // "forward" op (aka "gradInput")
  
-  dX->ResizeLike(X);
+  auto* dX = Output(
+      0, X.sizes(), at::dtype<float>()); // Gradient of net w.r.t. input to
+                                         // "forward" op (aka "gradInput")
+
    // Row pass reduces shape of dY from (N, C, H + 1, W + 1)
    // to (N, C, H + 1, W)
    // Col pass reduces shape to (N, C, H, W)
diff --git a/caffe2/operators/leaky_relu_op.cu b/caffe2/operators/leaky_relu_op.cu

index 95429e6..f35142e 100644 (file)
--- a/caffe2/operators/leaky_relu_op.cu
+++ b/caffe2/operators/leaky_relu_op.cu
@@ -29,8 +29,8 @@ template <>
  bool LeakyReluOp<float, CUDAContext>::RunOnDevice() {
    const auto& X = Input(0);
    CAFFE_ENFORCE_GT(X.size(), 0);
-  auto* Y = Output(0);
-  Y->ResizeLike(X);
+
+  auto* Y = Output(0, X.sizes(), at::dtype<float>());
    LeakyReluKernel<<<
        CAFFE_GET_BLOCKS(X.size()),
        CAFFE_CUDA_NUM_THREADS,
@@ -44,8 +44,8 @@ template <>
  bool LeakyReluGradientOp<float, CUDAContext>::RunOnDevice() {
    const auto& Y = Input(0);
    const auto& dY = Input(1);
-  auto* dX = Output(0);
-  dX->ResizeLike(Y);
+
+  auto* dX = Output(0, Y.sizes(), at::dtype<float>());
    CAFFE_ENFORCE_EQ(Y.size(), dY.size());
    LeakyReluGradientKernel<<<
        CAFFE_GET_BLOCKS(Y.size()),
diff --git a/caffe2/operators/local_response_normalization_op.cu b/caffe2/operators/local_response_normalization_op.cu

index edcd8e8..7e33553 100644 (file)
--- a/caffe2/operators/local_response_normalization_op.cu
+++ b/caffe2/operators/local_response_normalization_op.cu
@@ -178,14 +178,14 @@ __global__ void LRNComputeDiffNHWC(const int nthreads, const T* bottom_data,
  template<>
  bool LRNOp<float, CUDAContext>::RunOnDeviceWithOrderNCHW() {
    auto& X = Input(0);
-  auto* Y = Output(0);
+
    DCHECK_EQ(X.ndim(), 4);
    const int N = X.dim32(0);
    const int C = X.dim32(1);
    const int H = X.dim32(2);
    const int W = X.dim32(3);
    const float* Xdata = X.data<float>();
-  Y->ResizeLike(X);
+  auto* Y = Output(0, X.sizes(), at::dtype<float>());
    float* Ydata = Y->template mutable_data<float>();
    if (OutputSize() > 1) {
      scale_ = Output(1);
@@ -211,14 +211,14 @@ bool LRNOp<float, CUDAContext>::RunOnDeviceWithOrderNCHW() {
  template<>
  bool LRNOp<float, CUDAContext>::RunOnDeviceWithOrderNHWC() {
    auto& X = Input(0);
-  auto* Y = Output(0);
+
    DCHECK_EQ(X.ndim(), 4);
    const int N = X.dim32(0);
    const int H = X.dim32(1);
    const int W = X.dim32(2);
    const int C = X.dim32(3);
    const float* Xdata = X.data<float>();
-  Y->ResizeLike(X);
+  auto* Y = Output(0, X.sizes(), at::dtype<float>());
    float* Ydata = Y->template mutable_data<float>();
    if (OutputSize() > 1) {
      scale_ = Output(1);
@@ -245,7 +245,7 @@ bool LRNGradientOp<float, CUDAContext>::RunOnDeviceWithOrderNCHW() {
    auto& X = Input(0);
    auto& Y = Input(1);
    auto& dY = Input(2);
-  auto* dX = Output(0);
+
    DCHECK_EQ(X.ndim(), 4);
    const int N = X.dim32(0);
    const int C = X.dim32(1);
@@ -255,7 +255,7 @@ bool LRNGradientOp<float, CUDAContext>::RunOnDeviceWithOrderNCHW() {
    // long as the sizes check out.
    DCHECK_EQ(X.size(), Y.size());
    DCHECK_EQ(X.size(), dY.size());
-  dX->ResizeLike(X);
+  auto* dX = Output(0, X.sizes(), at::dtype<float>());
  
    const float* Xdata = X.data<float>();
    const float* Ydata = Y.data<float>();
@@ -284,7 +284,7 @@ bool LRNGradientOp<float, CUDAContext>::RunOnDeviceWithOrderNHWC() {
    auto& X = Input(0);
    auto& Y = Input(1);
    auto& dY = Input(2);
-  auto* dX = Output(0);
+
    DCHECK_EQ(X.ndim(), 4);
    const int N = X.dim32(0);
    const int H = X.dim32(1);
@@ -295,7 +295,7 @@ bool LRNGradientOp<float, CUDAContext>::RunOnDeviceWithOrderNHWC() {
    // long as the sizes check out.
    DCHECK_EQ(X.size(), Y.size());
    DCHECK_EQ(X.size(), dY.size());
-  dX->ResizeLike(X);
+  auto* dX = Output(0, X.sizes(), at::dtype<float>());
    if (!scale_) {
      scale_ = &local_scale_tensor_;
    }
diff --git a/caffe2/operators/lp_pool_op.cu b/caffe2/operators/lp_pool_op.cu

index 2564198..1547b00 100644 (file)
--- a/caffe2/operators/lp_pool_op.cu
+++ b/caffe2/operators/lp_pool_op.cu
@@ -279,8 +279,8 @@ bool PoolGradientOp<float, CUDAContext, LpPoolFunctor>::
    auto& Y = Input(1);
    auto& dY = Input(2);
    CAFFE_ENFORCE_EQ(dY.ndim(), 4);
-  auto* dX = Output(0);
-  dX->ResizeLike(X);
+
+  auto* dX = Output(0, X.sizes(), at::dtype<float>());
    ConvPoolOpBase<CUDAContext>::ComputePads({X.dim32(2), X.dim32(3)});
    LpPoolBackwardNCHW<float>
        <<<CAFFE_GET_BLOCKS(X.size()),
@@ -315,8 +315,8 @@ bool PoolGradientOp<float, CUDAContext, LpPoolFunctor>::
    auto& Y = Input(1);
    auto& dY = Input(2);
    CAFFE_ENFORCE_EQ(dY.ndim(), 4);
-  auto* dX = Output(0);
-  dX->ResizeLike(X);
+
+  auto* dX = Output(0, X.sizes(), at::dtype<float>());
    ConvPoolOpBase<CUDAContext>::ComputePads({X.dim32(1), X.dim32(2)});
    LpPoolBackwardNHWC<float>
        <<<CAFFE_GET_BLOCKS(X.size()),
diff --git a/caffe2/operators/margin_ranking_criterion_op.cu b/caffe2/operators/margin_ranking_criterion_op.cu

index 5593a1d..a61b873 100644 (file)
--- a/caffe2/operators/margin_ranking_criterion_op.cu
+++ b/caffe2/operators/margin_ranking_criterion_op.cu
@@ -33,14 +33,14 @@ bool MarginRankingCriterionOp<CUDAContext>::RunOnDevice() {
    auto& X1 = Input(0);
    auto& X2 = Input(1);
    auto& Y = Input(2);
-  auto* loss = Output(0);
+
    CAFFE_ENFORCE(
        X1.size() == X2.size(),
        "The two inputs for computing ranking loss should have the same size.");
    CAFFE_ENFORCE(
        X1.size() == Y.size(),
        "The input and label should have the same size.");
-  loss->ResizeLike(X1);
+  auto* loss = Output(0, X1.sizes(), at::dtype<float>());
  
    const float* X1data = X1.data<float>();
    const float* X2data = X2.data<float>();
@@ -59,11 +59,9 @@ bool MarginRankingCriterionGradientOp<CUDAContext>::RunOnDevice() {
    auto& X2 = Input(1);
    auto& Y = Input(2);
    auto& dOutput = Input(3);
-  auto* dX1 = Output(0);
-  auto* dX2 = Output(1);
  
-  dX1->ResizeLike(X1);
-  dX2->ResizeLike(X2);
+  auto* dX1 = Output(0, X1.sizes(), at::dtype<float>());
+  auto* dX2 = Output(1, X2.sizes(), at::dtype<float>());
  
    const float* X1data = X1.data<float>();
    const float* X2data = X2.data<float>();
diff --git a/caffe2/operators/piecewise_linear_transform_op.cu b/caffe2/operators/piecewise_linear_transform_op.cu

index 1d3e850..8b6b9f7 100644 (file)
--- a/caffe2/operators/piecewise_linear_transform_op.cu
+++ b/caffe2/operators/piecewise_linear_transform_op.cu
@@ -194,11 +194,11 @@ void PiecewiseLinearTransformOp<float, CUDAContext>::setUpTensors(
  template <>
  bool PiecewiseLinearTransformOp<float, CUDAContext>::TransformGeneral() {
    auto& X = Input(0);
-  auto* Y = Output(0);
+
    CAFFE_ENFORCE_EQ(X.ndim(), 2);
    int64_t N = X.dim32(0);
    int64_t M = X.dim32(1);
-  Y->ResizeLike(X);
+  auto* Y = Output(0, X.sizes(), at::dtype<float>());
  
    int64_t num_func_per_group;
    int64_t num_group;
@@ -226,14 +226,14 @@ bool PiecewiseLinearTransformOp<float, CUDAContext>::TransformGeneral() {
  template <>
  bool PiecewiseLinearTransformOp<float, CUDAContext>::TransformBinary() {
    auto& X = Input(0);
-  auto* Y = Output(0);
+
    CAFFE_ENFORCE(X.ndim() == 1 || X.ndim() == 2);
    int64_t N = X.dim32(0);
    int64_t M = X.ndim() == 2 ? X.dim32(1) : 1;
    CAFFE_ENFORCE(
        M == 1 || M == 2,
        "If binary is set to true, the input must be Nx2 or Nx1 tensor");
-  Y->ResizeLike(X);
+  auto* Y = Output(0, X.sizes(), at::dtype<float>());
  
    int64_t num_func_per_group;
    int64_t num_group;
diff --git a/caffe2/operators/pool_op.cu b/caffe2/operators/pool_op.cu

index e0b6534..2a18be9 100644 (file)
--- a/caffe2/operators/pool_op.cu
+++ b/caffe2/operators/pool_op.cu
@@ -744,8 +744,8 @@ bool PoolGradientOp<float, CUDAContext, AveragePool>::
    auto& X = Input(0);
    auto& dY = Input(2);
    CAFFE_ENFORCE_EQ(dY.dim32(1), X.dim32(1));
-  auto* dX = Output(0);
-  dX->ResizeLike(X);
+
+  auto* dX = Output(0, X.sizes(), at::dtype<float>());
    vector<int> dims(X.sizes().begin() + 2, X.sizes().end());
    ConvPoolOpBase<CUDAContext>::ComputePads(dims);
    switch (kernel_.size()) {
@@ -828,8 +828,8 @@ bool PoolGradientOp<float, CUDAContext, AveragePool>::
    auto& dY = Input(2);
    CAFFE_ENFORCE_EQ(X.ndim(), dY.ndim());
    CAFFE_ENFORCE_EQ(X.dim32(X.ndim() - 1), dY.dim32(dY.ndim() - 1));
-  auto* dX = Output(0);
-  dX->ResizeLike(X);
+
+  auto* dX = Output(0, X.sizes(), at::dtype<float>());
    vector<int> dims(X.sizes().begin() + 1, X.sizes().end() - 1);
    ConvPoolOpBase<CUDAContext>::ComputePads(dims);
    switch (kernel_.size()) {
@@ -1577,8 +1577,8 @@ bool PoolGradientOp<float, CUDAContext, MaxPool>::RunOnDeviceWithOrderNCHW() {
    auto& Y = Input(1);
    auto& dY = Input(2);
    CAFFE_ENFORCE_EQ(dY.ndim(), X.ndim());
-  auto* dX = Output(0);
-  dX->ResizeLike(X);
+
+  auto* dX = Output(0, X.sizes(), at::dtype<float>());
    vector<int> dims(X.sizes().begin() + 2, X.sizes().end());
    ConvPoolOpBase<CUDAContext>::ComputePads(dims);
    switch (kernel_.size()) {
@@ -1666,8 +1666,8 @@ bool PoolGradientOp<float, CUDAContext, MaxPool>::RunOnDeviceWithOrderNHWC() {
    auto& Y = Input(1);
    auto& dY = Input(2);
    CAFFE_ENFORCE_EQ(dY.ndim(), X.ndim());
-  auto* dX = Output(0);
-  dX->ResizeLike(X);
+
+  auto* dX = Output(0, X.sizes(), at::dtype<float>());
    vector<int> dims(X.sizes().begin() + 1, X.sizes().end() - 1);
    ConvPoolOpBase<CUDAContext>::ComputePads(dims);
    switch (kernel_.size()) {
diff --git a/caffe2/operators/pool_op_cudnn.cu b/caffe2/operators/pool_op_cudnn.cu

index 0a6ede3..b521d34 100644 (file)
--- a/caffe2/operators/pool_op_cudnn.cu
+++ b/caffe2/operators/pool_op_cudnn.cu
@@ -339,12 +339,11 @@ class CuDNNPoolGradientOp : public ConvPoolOpBase<CUDAContext> {
      auto& X = Input(0);
      auto& Y = Input(1);
      auto& dY = Input(2);
-    auto* dX = Output(0);
  
      // cuDNN pooling support only 2 and 3 spatial dimensions.
      CAFFE_ENFORCE(X.ndim() >= 4 && X.ndim() <= 5);
  
-    dX->ResizeLike(X);
+    auto* dX = Output(0, X.sizes(), at::dtype<float>());
      int N = 0, C = 0, H = 0, W = 0, D = 0;
      int H_out = 0, W_out = 0, D_out = 0;
      switch (order_) {
diff --git a/caffe2/operators/prelu_op.cu b/caffe2/operators/prelu_op.cu

index 9cf5d58..d2a543c 100644 (file)
--- a/caffe2/operators/prelu_op.cu
+++ b/caffe2/operators/prelu_op.cu
@@ -150,8 +150,8 @@ template <>
  bool PReluOp<float, CUDAContext>::RunOnDevice() {
    const auto& X = Input(0);
    const auto& W = Input(1);
-  auto* Y = Output(0);
-  Y->ResizeLike(X);
+
+  auto* Y = Output(0, X.sizes(), at::dtype<float>());
    const auto* Xdata = X.data<float>();
    const auto* Wdata = W.data<float>();
    auto* Ydata = Y->template mutable_data<float>();
@@ -207,12 +207,10 @@ bool PReluGradientOp<float, CUDAContext>::RunOnDevice() {
    auto& W = Input(3);
  
    CAFFE_ENFORCE(&Y != &X, "Cannot backpropagate through an in-place PReLU");
-  auto* dX = Output(0);
-  auto* dW = Output(1);
  
    DCHECK_EQ(dY.size(), Y.size());
-  dX->ResizeLike(Y);
-  dW->ResizeLike(W);
+  auto* dX = Output(0, Y.sizes(), at::dtype<float>());
+  auto* dW = Output(1, W.sizes(), at::dtype<float>());
  
    const auto C = order_ == StorageOrder::NCHW ? X.dim(1) : X.dim(X.ndim() - 1);
    const auto C_shared = (W.size() == 1);
diff --git a/caffe2/operators/reduction_ops.cu b/caffe2/operators/reduction_ops.cu

index 5a12eb2..a1249ef 100644 (file)
--- a/caffe2/operators/reduction_ops.cu
+++ b/caffe2/operators/reduction_ops.cu
@@ -84,8 +84,8 @@ bool SumElementsGradientOp<float, CUDAContext>::RunOnDevice() {
    auto& X = Input(0);
    auto& dY = Input(1);
    DCHECK_EQ(dY.size(), 1);
-  auto* dX = Output(0);
-  dX->ResizeLike(X);
+
+  auto* dX = Output(0, X.sizes(), at::dtype<float>());
    SumElementsGradientKernel<float>
        <<<CAFFE_GET_BLOCKS(X.size()),
           CAFFE_CUDA_NUM_THREADS,
@@ -104,8 +104,7 @@ bool MaxReductionGradientOp<T, Context, ROWWISE>::RunOnDevice() {
    auto& Y = Input(1);
    auto& dY = Input(2);
  
-  auto* dX = Output(0);
-  dX->ResizeLike(X);
+  auto* dX = Output(0, X.sizes(), at::dtype<T>());
  
    CAFFE_ENFORCE_EQ(X.ndim(), 3);
  
diff --git a/caffe2/operators/roi_align_gradient_op.cu b/caffe2/operators/roi_align_gradient_op.cu

index ca89a83..0d17e43 100644 (file)
--- a/caffe2/operators/roi_align_gradient_op.cu
+++ b/caffe2/operators/roi_align_gradient_op.cu
@@ -193,10 +193,10 @@ bool RoIAlignGradientOp<float, CUDAContext>::RunOnDevice() {
    auto& R = Input(1); // RoIs
    auto& dY = Input(2); // Gradient of net w.r.t. output of "forward" op
                         // (aka "gradOutput")
-  auto* dX = Output(0); // Gradient of net w.r.t. input to
-                        // "forward" op (aka "gradInput")
  
-  dX->ResizeLike(X);
+  auto* dX = Output(
+      0, X.sizes(), at::dtype<float>()); // Gradient of net w.r.t. input to
+                                         // "forward" op (aka "gradInput")
  
    // Must zero-out dX before accumulating gradients
    // (TODO): Kaiming - is this safe?
diff --git a/caffe2/operators/roi_align_rotated_gradient_op.cu b/caffe2/operators/roi_align_rotated_gradient_op.cu

index 03bc980..4148d1a 100644 (file)
--- a/caffe2/operators/roi_align_rotated_gradient_op.cu
+++ b/caffe2/operators/roi_align_rotated_gradient_op.cu
@@ -199,10 +199,10 @@ bool RoIAlignRotatedGradientOp<float, CUDAContext>::RunOnDevice() {
    auto& R = Input(1); // RoIs
    auto& dY = Input(2); // Gradient of net w.r.t. output of "forward" op
                         // (aka "gradOutput")
-  auto* dX = Output(0); // Gradient of net w.r.t. input to "forward" op
-                        // (aka "gradInput")
  
-  dX->ResizeLike(X);
+  auto* dX = Output(
+      0, X.sizes(), at::dtype<float>()); // Gradient of net w.r.t. input to
+                                         // "forward" op (aka "gradInput")
  
    // Must zero-out dX before accumulating gradients
    math::Set<float, CUDAContext>(
diff --git a/caffe2/operators/roi_pool_op.cu b/caffe2/operators/roi_pool_op.cu

index 65b1702..1154997 100644 (file)
--- a/caffe2/operators/roi_pool_op.cu
+++ b/caffe2/operators/roi_pool_op.cu
@@ -173,10 +173,10 @@ bool RoIPoolGradientOp<float, CUDAContext>::RunOnDevice() {
    auto& A = Input(2); // argmaxes
    auto& dY = Input(3); // Gradient of net w.r.t. output of "forward" op
    // (aka "gradOutput")
-  auto* dX = Output(0); // Gradient of net w.r.t. input to "forward" op
-  // (aka "gradInput")
  
-  dX->ResizeLike(X);
+  auto* dX = Output(
+      0, X.sizes(), at::dtype<float>()); // Gradient of net w.r.t. input to
+                                         // "forward" op (aka "gradInput")
    // Must zero-out dX before accumulating gradients
    math::Set<float, CUDAContext>(
        dX->size(), 0.f, dX->template mutable_data<float>(), &context_);
diff --git a/caffe2/operators/segment_reduction_op_gpu.cu b/caffe2/operators/segment_reduction_op_gpu.cu

index 4846c75..d79220c 100644 (file)
--- a/caffe2/operators/segment_reduction_op_gpu.cu
+++ b/caffe2/operators/segment_reduction_op_gpu.cu
@@ -1189,8 +1189,8 @@ class SortedSegmentRangeMeanGradientOp : public Operator<Context> {
      const auto& Y = Input(1);
      const auto& dY = Input(2);
      const auto& I = Input(3);
-    auto* dX = Output(0);
-    dX->ResizeLike(X);
+
+    auto* dX = Output(0, X.sizes(), at::dtype<T>());
  
      const int M = X.dim32(0);
      const int N = X.size_from_dim(1);
@@ -1687,7 +1687,6 @@ class CUDASparseLengthsIndicesInGradientWeightedSumWithMainInputGradientOp
      auto& dataInput = Input(3);
      auto& indicesInput = Input(4);
  
-    auto* weightGradsOutput = Output(1);
      CAFFE_ENFORCE_EQ(1, lengthsInput.ndim(), "LENGTHS must be a vector");
      CAFFE_ENFORCE_EQ(1, weightsInput.ndim(), "WEIGHTS must be a vector");
  
@@ -1699,7 +1698,7 @@ class CUDASparseLengthsIndicesInGradientWeightedSumWithMainInputGradientOp
      int output_0dim = indicesInput.dim(0);
      shape[0] = output_0dim;
      auto* dataGradsOutput = Output(0, shape, at::dtype<T>());
-    weightGradsOutput->ResizeLike(indicesInput);
+    auto* weightGradsOutput = Output(1, indicesInput.sizes(), at::dtype<T>());
      T* out_data_grads = dataGradsOutput->template mutable_data<T>();
      T* out_weight_grads = weightGradsOutput->template mutable_data<T>();
  
diff --git a/caffe2/operators/selu_op.cu b/caffe2/operators/selu_op.cu

index 647c518..222a3de 100644 (file)
--- a/caffe2/operators/selu_op.cu
+++ b/caffe2/operators/selu_op.cu
@@ -30,9 +30,9 @@ __global__ void SeluGradientKernel(
  template <>
  bool SeluOp<float, CUDAContext>::RunOnDevice() {
    auto& X = Input(0);
-  auto* Y = Output(0);
+
    CAFFE_ENFORCE_GT(X.size(), 0);
-  Y->ResizeLike(X);
+  auto* Y = Output(0, X.sizes(), at::dtype<float>());
    SeluKernel<float>
        <<<CAFFE_GET_BLOCKS(X.size()),
           CAFFE_CUDA_NUM_THREADS,
@@ -50,10 +50,10 @@ template <>
  bool SeluGradientOp<float, CUDAContext>::RunOnDevice() {
    auto& Y = Input(0);
    auto& dY = Input(1);
-  auto* dX = Output(0);
+
    CAFFE_ENFORCE_GT(Y.size(), 0);
    CAFFE_ENFORCE_EQ(dY.size(), Y.size());
-  dX->ResizeLike(Y);
+  auto* dX = Output(0, Y.sizes(), at::dtype<float>());
    SeluGradientKernel<float>
        <<<CAFFE_GET_BLOCKS(Y.size()),
           CAFFE_CUDA_NUM_THREADS,
diff --git a/caffe2/operators/softmax_ops.cu b/caffe2/operators/softmax_ops.cu

index 44fc80b..a58ebc8 100644 (file)
--- a/caffe2/operators/softmax_ops.cu
+++ b/caffe2/operators/softmax_ops.cu
@@ -284,15 +284,17 @@ template<>
  bool SoftmaxWithLossOp<float, CUDAContext>::RunOnDevice() {
    auto& X = Input(0);  // Logits
    auto& T = Input(1);  // Labels / targets
-  auto* P = Output(0); // Probabilities from softmax
  
    const float* weights = (InputSize() > 2 ? Input(2).data<float>() : NULL);
    const auto canonical_axis = X.canonical_axis_index(axis_);
    int N, D;
    N = X.size_to_dim(canonical_axis); // batch size
    D = X.size_from_dim(canonical_axis);
-  P->ResizeLike(X);
+
+  auto* P =
+      Output(0, X.sizes(), at::dtype<float>()); // Probabilities from softmax
    ReinitializeTensor(&total_weight_ptr_, {1}, at::dtype<float>().device(CUDA));
+  total_weight_ptr_.Resize(1);
  
    if (label_prob_mode_) {
      CAFFE_ENFORCE_GE(T.ndim(), 2);
@@ -391,14 +393,16 @@ template <>
  bool SpatialSoftmaxWithLossOp<float, CUDAContext>::RunOnDevice() {
    auto& X = Input(0); // Logits
    auto& T = Input(1); // Labels / targets
-  auto* P = Output(0); // Probabilities from softmax
  
    const float* weights = (InputSize() > 2 ? Input(2).data<float>() : NULL);
    int N, D;
    N = X.dim32(0);
    D = X.dim32(1);
-  P->ResizeLike(X);
+
+  auto* P =
+      Output(0, X.sizes(), at::dtype<float>()); // Probabilities from softmax
    ReinitializeTensor(&total_weight_ptr_, {1}, at::dtype<float>().device(CUDA));
+
    CAFFE_ENFORCE_EQ(X.ndim(), 4);
    CAFFE_ENFORCE_EQ(T.ndim(), 3);
    CAFFE_ENFORCE_EQ(T.dim32(0), N);
@@ -685,11 +689,11 @@ bool SpatialSoftmaxWithLossGradientOp<float, CUDAContext>::RunOnDevice() {
  template <>
  bool SoftmaxOp<float, CUDAContext>::RunOnDevice() {
    auto& X = Input(0);
-  auto* P = Output(0);
+
    const auto canonical_axis = X.canonical_axis_index(axis_);
    const int N = X.size_to_dim(canonical_axis);
    const int D = X.size_from_dim(canonical_axis);
-  P->ResizeLike(X);
+  auto* P = Output(0, X.sizes(), at::dtype<float>());
    auto* P_data = P->mutable_data<float>();
    if (N == 0) {
      return true;
@@ -760,11 +764,11 @@ template <>
  bool SoftmaxGradientOp<float, CUDAContext>::RunOnDevice() {
    auto& Y = Input(0);
    auto& dY = Input(1);
-  auto* dX = Output(0);
+
    const auto canonical_axis = Y.canonical_axis_index(axis_);
    const int N = Y.size_to_dim(canonical_axis);
    const int D = Y.size_from_dim(canonical_axis);
-  dX->ResizeLike(Y);
+  auto* dX = Output(0, Y.sizes(), at::dtype<float>());
    auto* dX_data = dX->mutable_data<float>();
    if (N == 0) {
      return true;
diff --git a/caffe2/operators/softplus_op.cu b/caffe2/operators/softplus_op.cu

index 569190a..7220102 100644 (file)
--- a/caffe2/operators/softplus_op.cu
+++ b/caffe2/operators/softplus_op.cu
@@ -23,9 +23,9 @@ SoftplusGradientKernel(const int N, const T* Y, const T* dY, T* dX) {
  template <>
  bool SoftplusOp<float, CUDAContext>::RunOnDevice() {
    auto& X = Input(0);
-  auto* Y = Output(0);
+
    DCHECK_GT(X.size(), 0);
-  Y->ResizeLike(X);
+  auto* Y = Output(0, X.sizes(), at::dtype<float>());
    SoftplusKernel<float>
        <<<CAFFE_GET_BLOCKS(X.size()),
           CAFFE_CUDA_NUM_THREADS,
@@ -39,10 +39,10 @@ template <>
  bool SoftplusGradientOp<float, CUDAContext>::RunOnDevice() {
    auto& Y = Input(0);
    auto& dY = Input(1);
-  auto* dX = Output(0);
+
    DCHECK_GT(Y.size(), 0);
    DCHECK_EQ(dY.size(), Y.size());
-  dX->ResizeLike(Y);
+  auto* dX = Output(0, Y.sizes(), at::dtype<float>());
    SoftplusGradientKernel<float>
        <<<CAFFE_GET_BLOCKS(Y.size()),
           CAFFE_CUDA_NUM_THREADS,
diff --git a/caffe2/operators/stump_func_op.cu b/caffe2/operators/stump_func_op.cu

index 9e38da2..d8df012 100644 (file)
--- a/caffe2/operators/stump_func_op.cu
+++ b/caffe2/operators/stump_func_op.cu
@@ -40,8 +40,8 @@ template <>
  bool StumpFuncOp<float, float, CUDAContext>::RunOnDevice() {
    auto& in = Input(0);
    const float* in_data = in.data<float>();
-  auto* out = Output(0);
-  out->ResizeLike(in);
+
+  auto* out = Output(0, in.sizes(), at::dtype<float>());
    float* out_data = out->template mutable_data<float>();
    StumpFuncKernel<<<CAFFE_GET_BLOCKS(in.size()), CAFFE_CUDA_NUM_THREADS,
      0, context_.cuda_stream()>>>(
diff --git a/caffe2/operators/thresholded_relu_op.cu b/caffe2/operators/thresholded_relu_op.cu

index 5a5027c..ece7117 100644 (file)
--- a/caffe2/operators/thresholded_relu_op.cu
+++ b/caffe2/operators/thresholded_relu_op.cu
@@ -22,9 +22,9 @@ ThresholdedReluGradientKernel(const int N, const T* Y, const T* dY, T* dX) {
  template <>
  bool ThresholdedReluOp<float, CUDAContext>::RunOnDevice() {
    auto& X = Input(0);
-  auto* Y = Output(0);
+
    CAFFE_ENFORCE_GT(X.size(), 0);
-  Y->ResizeLike(X);
+  auto* Y = Output(0, X.sizes(), at::dtype<float>());
    ThresholdedReluKernel<<<
        CAFFE_GET_BLOCKS(X.size()),
        CAFFE_CUDA_NUM_THREADS,
@@ -38,10 +38,10 @@ template <>
  bool ThresholdedReluGradientOp<float, CUDAContext>::RunOnDevice() {
    auto& Y = Input(0);
    auto& dY = Input(1);
-  auto* dX = Output(0);
+
    CAFFE_ENFORCE_GT(Y.size(), 0);
    CAFFE_ENFORCE_EQ(dY.size(), Y.size());
-  dX->ResizeLike(Y);
+  auto* dX = Output(0, Y.sizes(), at::dtype<float>());
    ThresholdedReluGradientKernel<<<
        CAFFE_GET_BLOCKS(Y.size()),
        CAFFE_CUDA_NUM_THREADS,
diff --git a/caffe2/operators/utility_ops.cu b/caffe2/operators/utility_ops.cu

index a87a039..7d639cf 100644 (file)
--- a/caffe2/operators/utility_ops.cu
+++ b/caffe2/operators/utility_ops.cu
@@ -217,8 +217,8 @@ bool SelectGradientOpBase<float, CUDAContext>::RunOnDevice() {
  
    for (int i = 0; i < OutputSize(); i++) {
      auto& input = Input(i + kInputStartOffset);
-    auto* grad_input = Output(i);
-    grad_input->ResizeLike(input);
+
+    auto* grad_input = Output(i, input.sizes(), at::dtype<float>());
      MaxMinGradKernel<<<
          CAFFE_GET_BLOCKS(input.size()),
          CAFFE_CUDA_NUM_THREADS,
author	Jerry Zhang <jerryzh@fb.com>
	Sat, 12 Jan 2019 01:39:11 +0000 (17:39 -0800)
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
	Sat, 12 Jan 2019 01:41:48 +0000 (17:41 -0800)
caffe2/operators/find_op.cu		patch \| blob \| history
caffe2/operators/floor_op.cu		patch \| blob \| history
caffe2/operators/half_float_ops.cu		patch \| blob \| history
caffe2/operators/instance_norm_op.cu		patch \| blob \| history
caffe2/operators/integral_image_op.cu		patch \| blob \| history
caffe2/operators/leaky_relu_op.cu		patch \| blob \| history
caffe2/operators/local_response_normalization_op.cu		patch \| blob \| history
caffe2/operators/lp_pool_op.cu		patch \| blob \| history
caffe2/operators/margin_ranking_criterion_op.cu		patch \| blob \| history
caffe2/operators/piecewise_linear_transform_op.cu		patch \| blob \| history
caffe2/operators/pool_op.cu		patch \| blob \| history
caffe2/operators/pool_op_cudnn.cu		patch \| blob \| history
caffe2/operators/prelu_op.cu		patch \| blob \| history
caffe2/operators/reduction_ops.cu		patch \| blob \| history
caffe2/operators/roi_align_gradient_op.cu		patch \| blob \| history
caffe2/operators/roi_align_rotated_gradient_op.cu		patch \| blob \| history
caffe2/operators/roi_pool_op.cu		patch \| blob \| history
caffe2/operators/segment_reduction_op_gpu.cu		patch \| blob \| history
caffe2/operators/selu_op.cu		patch \| blob \| history
caffe2/operators/softmax_ops.cu		patch \| blob \| history
caffe2/operators/softplus_op.cu		patch \| blob \| history
caffe2/operators/stump_func_op.cu		patch \| blob \| history
caffe2/operators/thresholded_relu_op.cu		patch \| blob \| history
caffe2/operators/utility_ops.cu		patch \| blob \| history