Tensor construction codemod(ResizeLike) - 1/3 (#15944)

author Jerry Zhang <jerryzh@fb.com>

Fri, 11 Jan 2019 20:14:58 +0000 (12:14 -0800)

committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>

Fri, 11 Jan 2019 20:28:12 +0000 (12:28 -0800)
author Jerry Zhang <jerryzh@fb.com>
Fri, 11 Jan 2019 20:14:58 +0000 (12:14 -0800)
committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
Fri, 11 Jan 2019 20:28:12 +0000 (12:28 -0800)
diff --git a/caffe2/operators/accumulate_op.h b/caffe2/operators/accumulate_op.h

index c0c070f..6f95484 100644 (file)
--- a/caffe2/operators/accumulate_op.h
+++ b/caffe2/operators/accumulate_op.h
@@ -18,13 +18,9 @@ class AccumulateOp final : public Operator<Context> {
  
    bool RunOnDevice() override {
      auto& input = Input(0);
-    auto* output = Output(0);
-    if (output->sizes() != input.sizes()) {
-      LOG(INFO) << "Reshaping and initializing output.";
-      output->ResizeLike(input);
-      math::Set<T, Context>(
-          output->numel(), 0, output->template mutable_data<T>(), &context_);
-    }
+
+    // TODO: the operator depends on output being set to 0 before the run
+    auto* output = Output(0, input.sizes(), at::dtype<T>());
      math::Axpby<T, T, Context>(
          input.numel(),
          static_cast<T>(1),
diff --git a/caffe2/operators/affine_channel_op.cu b/caffe2/operators/affine_channel_op.cu

index 6faa01e..4ac7d33 100644 (file)
--- a/caffe2/operators/affine_channel_op.cu
+++ b/caffe2/operators/affine_channel_op.cu
@@ -55,8 +55,8 @@ template <>
  bool AffineChannelGradientOp<float, CUDAContext>::RunOnDeviceWithOrderNCHW() {
    const auto& dY = Input(0);
    const auto& scale = is_learnable_ ? Input(2) : Input(1);
-  auto* dX = Output(0);
-  dX->ResizeLike(dY);
+  
+  auto* dX = Output(0, dY.sizes(), at::dtype<float>());
    const int N = dY.dim32(0);
    const int C = dY.dim32(1);
    const int HxW = dY.size() / (N * C);
@@ -76,10 +76,10 @@ bool AffineChannelGradientOp<float, CUDAContext>::RunOnDeviceWithOrderNCHW() {
    if (is_learnable_) {
      const auto& X = Input(1);
      const float* X_data = X.data<float>();
-    auto* dscale = Output(1);
-    auto* dbias = Output(2);
-    dscale->ResizeLike(scale);
-    dbias->ResizeLike(scale);
+    
+    
+    auto* dscale = Output(1, scale.sizes(), at::dtype<float>());
+    auto* dbias = Output(2, scale.sizes(), at::dtype<float>());
      const int outer_size = N * HxW;
      AffineChannelScaleBiasBackwardCUDAKernel<float, StorageOrder::NCHW>
          <<<std::min(outer_size, CAFFE_MAXIMUM_NUM_BLOCKS),
@@ -101,8 +101,8 @@ template <>
  bool AffineChannelGradientOp<float, CUDAContext>::RunOnDeviceWithOrderNHWC() {
    const auto& dY = Input(0);
    const auto& scale = is_learnable_ ? Input(2) : Input(1);
-  auto* dX = Output(0);
-  dX->ResizeLike(dY);
+  
+  auto* dX = Output(0, dY.sizes(), at::dtype<float>());
    const int ndim = dY.ndim();
    const int C = dY.dim32(ndim - 1);
    const int rows = dY.size() / C;
@@ -121,10 +121,10 @@ bool AffineChannelGradientOp<float, CUDAContext>::RunOnDeviceWithOrderNHWC() {
      const float* X_data = X.data<float>();
      const int N = X.dim32(0);
      const int HxW = rows / N;
-    auto* dscale = Output(1);
-    auto* dbias = Output(2);
-    dscale->ResizeLike(scale);
-    dbias->ResizeLike(scale);
+    
+    
+    auto* dscale = Output(1, scale.sizes(), at::dtype<float>());
+    auto* dbias = Output(2, scale.sizes(), at::dtype<float>());
      AffineChannelScaleBiasBackwardCUDAKernel<float, StorageOrder::NHWC>
          <<<std::min(rows, CAFFE_MAXIMUM_NUM_BLOCKS),
             CAFFE_CUDA_NUM_THREADS,
diff --git a/caffe2/operators/batch_gather_ops.cu b/caffe2/operators/batch_gather_ops.cu

index 4347a60..5e257e0 100644 (file)
--- a/caffe2/operators/batch_gather_ops.cu
+++ b/caffe2/operators/batch_gather_ops.cu
@@ -71,7 +71,7 @@ bool BatchGatherGradientOp<CUDAContext>::DoRunWithType2() {
    auto& data = Input(DATA);
    auto& indices = Input(INDICES);
    auto& grad = Input(GRAD);
-  auto* output = Output(0);
+  
  
    // ONNX allows negative axis to index from the back, valid range: [-r, r].
    int axis = axis_;
@@ -85,7 +85,7 @@ bool BatchGatherGradientOp<CUDAContext>::DoRunWithType2() {
          data.size(acheck), grad.size(acheck), "batch sizes should be the same");
    }
  
-  output->ResizeLike(data);
+  auto* output = Output(0, data.sizes(), at::dtype<float>());
    auto* out_data = output->template mutable_data<float>();
    math::Set<float, CUDAContext>(output->size(), 0, out_data, &context_);
  
diff --git a/caffe2/operators/boolean_mask_ops.cu b/caffe2/operators/boolean_mask_ops.cu

index 3e8c6ce..ae5758a 100644 (file)
--- a/caffe2/operators/boolean_mask_ops.cu
+++ b/caffe2/operators/boolean_mask_ops.cu
@@ -82,7 +82,7 @@ class BooleanMaskOp<CUDAContext> final : public Operator<CUDAContext> {
      auto* destData = (uint8_t*)dest->raw_mutable_data(src.meta());
      const auto* srcData = (uint8_t*)src.raw_data();
      if (OutputSize() == 2) {
-      
+
        auto* indicesOut = Output(1, {numOfOutput}, at::dtype<int64_t>());
        indicesOut->template mutable_data<int64_t>();
      }
@@ -306,8 +306,7 @@ bool SequenceMaskOp<CUDAContext>::DoRunWithType() {
      window_centers = &Input(1);
    }
  
-  auto* output = Output(0);
-  output->ResizeLike(*input);
+  auto* output = Output(0, input->sizes(), at::dtype<T>());
  
    const auto canonical_axis = input->canonical_axis_index(axis_);
  
diff --git a/caffe2/operators/ceil_op.cu b/caffe2/operators/ceil_op.cu

index 651b002..d19541a 100644 (file)
--- a/caffe2/operators/ceil_op.cu
+++ b/caffe2/operators/ceil_op.cu
@@ -14,9 +14,9 @@ __global__ void CeilKernel(const int N, const T* X, T* Y) {
  template <>
  bool CeilOp<float, CUDAContext>::RunOnDevice() {
    auto& X = Input(0);
-  auto* Y = Output(0);
+  
    CAFFE_ENFORCE_GT(X.size(), 0);
-  Y->ResizeLike(X);
+  auto* Y = Output(0, X.sizes(), at::dtype<float>());
    CeilKernel<<<
        CAFFE_GET_BLOCKS(X.size()),
        CAFFE_CUDA_NUM_THREADS,
diff --git a/caffe2/operators/channel_shuffle_op.cu b/caffe2/operators/channel_shuffle_op.cu

index 78cbe6b..c93181c 100644 (file)
--- a/caffe2/operators/channel_shuffle_op.cu
+++ b/caffe2/operators/channel_shuffle_op.cu
@@ -55,8 +55,8 @@ ChannelShuffleNHWCKernel(const int G, const int K, const float* X, float* Y) {
  template <>
  bool ChannelShuffleOp<float, CUDAContext>::RunOnDeviceWithOrderNCHW() {
    const auto& X = Input(0);
-  auto* Y = Output(0);
-  Y->ResizeLike(X);
+  
+  auto* Y = Output(0, X.sizes(), at::dtype<float>());
    const int N = X.dim32(0);
    const int C = X.dim32(1);
    const int G = this->group_;
@@ -86,8 +86,8 @@ bool ChannelShuffleOp<float, CUDAContext>::RunOnDeviceWithOrderNCHW() {
  template <>
  bool ChannelShuffleOp<float, CUDAContext>::RunOnDeviceWithOrderNHWC() {
    const auto& X = Input(0);
-  auto* Y = Output(0);
-  Y->ResizeLike(X);
+  
+  auto* Y = Output(0, X.sizes(), at::dtype<float>());
    const int ndim = X.ndim();
    const int N = X.dim32(0);
    const int C = X.dim32(ndim - 1);
@@ -125,8 +125,8 @@ bool ChannelShuffleOp<float, CUDAContext>::RunOnDeviceWithOrderNHWC() {
  template <>
  bool ChannelShuffleGradientOp<float, CUDAContext>::RunOnDeviceWithOrderNCHW() {
    const auto& dY = Input(0);
-  auto* dX = Output(0);
-  dX->ResizeLike(dY);
+  
+  auto* dX = Output(0, dY.sizes(), at::dtype<float>());
    const int N = dY.dim32(0);
    const int C = dY.dim32(1);
    const int G = this->group_;
@@ -156,8 +156,8 @@ bool ChannelShuffleGradientOp<float, CUDAContext>::RunOnDeviceWithOrderNCHW() {
  template <>
  bool ChannelShuffleGradientOp<float, CUDAContext>::RunOnDeviceWithOrderNHWC() {
    const auto& dY = Input(0);
-  auto* dX = Output(0);
-  dX->ResizeLike(dY);
+  
+  auto* dX = Output(0, dY.sizes(), at::dtype<float>());
    const int ndim = dY.ndim();
    const int N = dY.dim32(0);
    const int C = dY.dim32(ndim - 1);
diff --git a/caffe2/operators/clip_op.cu b/caffe2/operators/clip_op.cu

index 46422a9..e7cde38 100644 (file)
--- a/caffe2/operators/clip_op.cu
+++ b/caffe2/operators/clip_op.cu
@@ -43,9 +43,9 @@ __global__ void ClipGradientKernel(const int N,  const T minval,
  template <>
  bool ClipOp<float, CUDAContext>::RunOnDevice() {
    auto& X = Input(0);
-  auto* Y = Output(0);
+  
    CAFFE_ENFORCE_GE(X.size(), 0);
-  Y->ResizeLike(X);
+  auto* Y = Output(0, X.sizes(), at::dtype<float>());
    ClipKernel<<<
        CAFFE_GET_BLOCKS(X.size()),
        CAFFE_CUDA_NUM_THREADS,
@@ -59,10 +59,10 @@ template <>
  bool ClipGradientOp<float, CUDAContext>::RunOnDevice() {
    auto& Y = Input(0);
    auto& dY = Input(1);
-  auto* dX = Output(0);
+  
    CAFFE_ENFORCE_GE(Y.size(), 0);
    CAFFE_ENFORCE_EQ(dY.size(), Y.size());
-  dX->ResizeLike(Y);
+  auto* dX = Output(0, Y.sizes(), at::dtype<float>());
    ClipGradientKernel<<<
        CAFFE_GET_BLOCKS(Y.size()),
        CAFFE_CUDA_NUM_THREADS,
diff --git a/caffe2/operators/cosine_embedding_criterion_op.cu b/caffe2/operators/cosine_embedding_criterion_op.cu

index e720f95..c910fc6 100644 (file)
--- a/caffe2/operators/cosine_embedding_criterion_op.cu
+++ b/caffe2/operators/cosine_embedding_criterion_op.cu
@@ -26,10 +26,10 @@ template <>
  bool CosineEmbeddingCriterionOp<CUDAContext>::RunOnDevice() {
    auto& S = Input(0);
    auto& Y = Input(1);
-  auto* output = Output(0);
+  
    CAFFE_ENFORCE(S.size() == Y.size(),
                  "The embedding and label should have the same size.");
-  output->ResizeLike(S);
+  auto* output = Output(0, S.sizes(), at::dtype<float>());
  
    const float* Sdata = S.data<float>();
    const int* Ydata = Y.data<int>();
@@ -46,9 +46,9 @@ bool CosineEmbeddingCriterionGradientOp<CUDAContext>::RunOnDevice() {
    auto& S = Input(0);
    auto& Y = Input(1);
    auto& dOutput = Input(2);
-  auto* dS = Output(0);
+  
  
-  dS->ResizeLike(S);
+  auto* dS = Output(0, S.sizes(), at::dtype<float>());
  
    const float* Sdata = S.data<float>();
    const int* Ydata = Y.data<int>();
diff --git a/caffe2/operators/cross_entropy_op.cu b/caffe2/operators/cross_entropy_op.cu

index ab2a702..7f1fb84 100644 (file)
--- a/caffe2/operators/cross_entropy_op.cu
+++ b/caffe2/operators/cross_entropy_op.cu
@@ -62,7 +62,7 @@ bool LabelCrossEntropyGradientOp<float, CUDAContext>::RunOnDevice() {
    auto& X = Input(0);
    auto& label = Input(1);
    auto& dY = Input(2);
-  auto* dX = Output(0);
+  
    int N, D;
    if (X.ndim() > 1) {
      N = X.dim32(0);
@@ -76,7 +76,7 @@ bool LabelCrossEntropyGradientOp<float, CUDAContext>::RunOnDevice() {
    CAFFE_ENFORCE_EQ(label.dim32(0), N);
    CAFFE_ENFORCE_EQ(dY.ndim(), 1);
    CAFFE_ENFORCE_EQ(dY.dim32(0), N);
-  dX->ResizeLike(X);
+  auto* dX = Output(0, X.sizes(), at::dtype<float>());
    math::Set<float, CUDAContext>(
        dX->size(), 0.f, dX->template mutable_data<float>(), &context_);
    LabelCrossEntropyGradientKernel<<<
@@ -288,8 +288,8 @@ bool SigmoidCrossEntropyWithLogitsGradientOp<float, CUDAContext>::
    const auto outer_size = logits.size() / inner_size;
    CAFFE_ENFORCE(g.size() == outer_size);
  
-  auto* out = Output(0);
-  out->ResizeLike(logits);
+  
+  auto* out = Output(0, logits.sizes(), at::dtype<float>());
    auto* out_ptr = out->template mutable_data<float>();
  
    auto* logits_ptr = logits.data<float>();
@@ -401,8 +401,8 @@ bool WeightedSigmoidCrossEntropyWithLogitsGradientOp<float, CUDAContext>::
    const auto outer_size = logits.size() / inner_size;
    CAFFE_ENFORCE(g.size() == outer_size);
  
-  auto* out = Output(0);
-  out->ResizeLike(logits);
+  
+  auto* out = Output(0, logits.sizes(), at::dtype<float>());
    auto* out_ptr = out->template mutable_data<float>();
  
    auto* logits_ptr = logits.data<float>();
diff --git a/caffe2/operators/deform_conv_op_impl.h b/caffe2/operators/deform_conv_op_impl.h

index 5b3517e..4e333ed 100644 (file)
--- a/caffe2/operators/deform_conv_op_impl.h
+++ b/caffe2/operators/deform_conv_op_impl.h
@@ -193,8 +193,8 @@ bool DeformConvGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
    auto& offset = Input(OFFSET);
    auto& filter = Input(FILTER);
    auto& dY = Input(OUTPUT_GRAD);
-  auto* dfilter = Output(FILTER_GRAD);
-  auto* doffset = Output(OFFSET_GRAD);
+  
+  
    const int N = X.dim32(0), C = X.dim32(1);
  
    const vector<int> input_dims = this->GetDims(X);
@@ -260,8 +260,8 @@ bool DeformConvGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
    }
  
    CAFFE_ENFORCE(M % group_ == 0);
-  dfilter->ResizeLike(filter);
-  doffset->ResizeLike(offset);
+  auto* dfilter = Output(FILTER_GRAD, filter.sizes(), at::dtype<T>());
+  auto* doffset = Output(OFFSET_GRAD, offset.sizes(), at::dtype<T>());
  
    // The dimension of each kernel
    const int kernel_dim = C / group_ * kernel_dims_size;
@@ -314,8 +314,8 @@ bool DeformConvGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
  
    T* dXdata = nullptr;
    if (OutputSize() == 4 || (no_bias_ && (OutputSize() == 3))) {
-    auto* dX = Output(no_bias_ ? BIAS_OR_INPUT_GRAD : INPUT_GRAD);
-    dX->ResizeLike(X);
+    
+    auto* dX = Output(no_bias_ ? BIAS_OR_INPUT_GRAD : INPUT_GRAD, X.sizes(), at::dtype<T>());
      dXdata = dX->template mutable_data<T>();
      math::Set<T, Context>(dX->size(), 0, dXdata, &context_);
    }
diff --git a/caffe2/operators/distance_op.cu b/caffe2/operators/distance_op.cu

index 2dc51cc..e0f5e99 100644 (file)
--- a/caffe2/operators/distance_op.cu
+++ b/caffe2/operators/distance_op.cu
@@ -79,8 +79,8 @@ bool SquaredL2DistanceGradientOp<float, CUDAContext>::RunOnDevice() {
    auto& X = Input(0);
    auto& Y = Input(1);
    auto& dDistance = Input(2);
-  auto* dX = Output(0);
-  auto* dY = Output(1);
+  
+  
    int N = X.ndim() > 0 ? X.dim32(0) : 1;
    int D = N > 0 ? X.size() / N : 0;
    CAFFE_ENFORCE(X.ndim() == Y.ndim());
@@ -95,8 +95,8 @@ bool SquaredL2DistanceGradientOp<float, CUDAContext>::RunOnDevice() {
    }
    CAFFE_ENFORCE_EQ(dDistance.ndim(), 1);
    CAFFE_ENFORCE_EQ(dDistance.dim32(0), N);
-  dX->ResizeLike(X);
-  dY->ResizeLike(Y);
+  auto* dX = Output(0, X.sizes(), at::dtype<float>());
+  auto* dY = Output(1, Y.sizes(), at::dtype<float>());
    math::Sub<float, CUDAContext>(
        X.size(),
        X.data<float>(),
@@ -211,8 +211,8 @@ bool L1DistanceGradientOp<float, CUDAContext>::RunOnDevice() {
    auto& X = Input(0);
    auto& Y = Input(1);
    auto& dDistance = Input(2);
-  auto* dX = Output(0);
-  auto* dY = Output(1);
+  
+  
    int N = X.ndim() > 0 ? X.dim32(0) : 1;
    int D = N > 0 ? X.size() / N : 0;
    CAFFE_ENFORCE(X.ndim() == Y.ndim());
@@ -227,8 +227,8 @@ bool L1DistanceGradientOp<float, CUDAContext>::RunOnDevice() {
    }
    CAFFE_ENFORCE_EQ(dDistance.ndim(), 1);
    CAFFE_ENFORCE_EQ(dDistance.dim32(0), N);
-  dX->ResizeLike(X);
-  dY->ResizeLike(Y);
+  auto* dX = Output(0, X.sizes(), at::dtype<float>());
+  auto* dY = Output(1, Y.sizes(), at::dtype<float>());
  
    L1DistanceGradientKernel<<<
        CAFFE_GET_BLOCKS(N * D),
@@ -351,8 +351,8 @@ bool CosineSimilarityGradientOp<float, CUDAContext>::RunOnDevice() {
    auto& X = Input(X_IN);
    auto& Y = Input(Y_IN);
    auto& dCos = Input(DER_COS_IN);
-  auto* dX = Output(DER_X_OUT);
-  auto* dY = Output(DER_Y_OUT);
+  
+  
    const int N = X.ndim() > 0 ? X.dim32(0) : 1;
    const int D = X.size_from_dim(1);
    CAFFE_ENFORCE(X.ndim() == Y.ndim());
@@ -361,8 +361,8 @@ bool CosineSimilarityGradientOp<float, CUDAContext>::RunOnDevice() {
    }
    CAFFE_ENFORCE(dCos.ndim() == 1);
    CAFFE_ENFORCE(dCos.dim32(0) == N);
-  dX->ResizeLike(X);
-  dY->ResizeLike(Y);
+  auto* dX = Output(DER_X_OUT, X.sizes(), at::dtype<float>());
+  auto* dY = Output(DER_Y_OUT, Y.sizes(), at::dtype<float>());
  
    const auto* X_data = X.data<float>();
    const auto* Y_data = Y.data<float>();
@@ -498,8 +498,8 @@ bool DotProductGradientOp<float, CUDAContext>::RunOnDevice() {
    auto& X = Input(X_IN);
    auto& Y = Input(Y_IN);
    auto& dDot = Input(DER_DOT_IN);
-  auto* dX = Output(DER_X_OUT);
-  auto* dY = Output(DER_Y_OUT);
+  
+  
    int N, D;
    if (X.size() > 0) {
      N = X.ndim() > 0 ? X.dim32(0) : 1;
@@ -514,8 +514,8 @@ bool DotProductGradientOp<float, CUDAContext>::RunOnDevice() {
    }
    CAFFE_ENFORCE(dDot.ndim() == 1);
    CAFFE_ENFORCE(dDot.dim32(0) == N);
-  dX->ResizeLike(X);
-  dY->ResizeLike(Y);
+  auto* dX = Output(DER_X_OUT, X.sizes(), at::dtype<float>());
+  auto* dY = Output(DER_Y_OUT, Y.sizes(), at::dtype<float>());
    DotProductGradientKernel<<<
        CAFFE_GET_BLOCKS(N * D),
        CAFFE_CUDA_NUM_THREADS,
diff --git a/caffe2/operators/elementwise_linear_op.cu b/caffe2/operators/elementwise_linear_op.cu

index efbf52a..4395402 100644 (file)
--- a/caffe2/operators/elementwise_linear_op.cu
+++ b/caffe2/operators/elementwise_linear_op.cu
@@ -54,7 +54,7 @@ bool ElementwiseLinearOp<float, CUDAContext>::RunOnDevice(){
    const auto& X = Input(0);
    const auto& a = Input(1);
    const auto& b = Input(2);
-  auto* Y = Output(0);
+  
  
    const auto canonical_axis = X.canonical_axis_index(axis_);
    const int N = X.size_to_dim(canonical_axis);
@@ -65,7 +65,7 @@ bool ElementwiseLinearOp<float, CUDAContext>::RunOnDevice(){
    CAFFE_ENFORCE_EQ(b.ndim(), 1, b.ndim());
    CAFFE_ENFORCE_EQ(b.dim(0), D, b.ndim());
  
-  Y->ResizeLike(X);
+  auto* Y = Output(0, X.sizes(), at::dtype<float>());
  
    ElementwiseLinearKernel<<<
        CAFFE_GET_BLOCKS(N * D),
@@ -95,12 +95,12 @@ bool ElementwiseLinearGradientOp<float, CUDAContext>::RunOnDevice(){
    CAFFE_ENFORCE_EQ(a.ndim(), 1, a.ndim());
    CAFFE_ENFORCE_EQ(a.dim(0), D, a.ndim());
  
-  auto* g_X = Output(0);
-  auto *g_a = Output(1);
-  auto *g_b = Output(2);
-  g_X->ResizeLike(X);
-  g_a->ResizeLike(a);
-  g_b->ResizeLike(a);
+  
+  
+  
+  auto* g_X = Output(0, X.sizes(), at::dtype<float>());
+  auto * g_a = Output(1, a.sizes(), at::dtype<float>());
+  auto * g_b = Output(2, a.sizes(), at::dtype<float>());
  
    float* g_a_data = g_a->template mutable_data<float>();
    float* g_b_data = g_b->template mutable_data<float>();
author	Jerry Zhang <jerryzh@fb.com>
	Fri, 11 Jan 2019 20:14:58 +0000 (12:14 -0800)
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
	Fri, 11 Jan 2019 20:28:12 +0000 (12:28 -0800)
caffe2/operators/accumulate_op.h		patch \| blob \| history
caffe2/operators/affine_channel_op.cu		patch \| blob \| history
caffe2/operators/batch_gather_ops.cu		patch \| blob \| history
caffe2/operators/boolean_mask_ops.cu		patch \| blob \| history
caffe2/operators/ceil_op.cu		patch \| blob \| history
caffe2/operators/channel_shuffle_op.cu		patch \| blob \| history
caffe2/operators/clip_op.cu		patch \| blob \| history
caffe2/operators/cosine_embedding_criterion_op.cu		patch \| blob \| history
caffe2/operators/cross_entropy_op.cu		patch \| blob \| history
caffe2/operators/deform_conv_op_impl.h		patch \| blob \| history
caffe2/operators/distance_op.cu		patch \| blob \| history
caffe2/operators/elementwise_linear_op.cu		patch \| blob \| history