Tensor construction codemod(raw_mutable_data) (#16373)

author Jerry Zhang <jerryzh@fb.com>

Sat, 30 Mar 2019 01:26:07 +0000 (18:26 -0700)

committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>

Sat, 30 Mar 2019 01:36:46 +0000 (18:36 -0700)
author Jerry Zhang <jerryzh@fb.com>
Sat, 30 Mar 2019 01:26:07 +0000 (18:26 -0700)
committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
Sat, 30 Mar 2019 01:36:46 +0000 (18:36 -0700)
diff --git a/caffe2/operators/boolean_mask_ops.cc b/caffe2/operators/boolean_mask_ops.cc

index dae6427..11b3015 100644 (file)
--- a/caffe2/operators/boolean_mask_ops.cc
+++ b/caffe2/operators/boolean_mask_ops.cc
@@ -50,7 +50,6 @@ template <>
  bool BooleanMaskOp<CPUContext>::RunOnDevice() {
    auto& data = Input(0);
    auto& mask = Input(1);
-  auto* dataOut = Output(0);
    CAFFE_ENFORCE(data.dim() >= 1);
    CAFFE_ENFORCE_EQ(mask.dim(), 1);
    CAFFE_ENFORCE(data.size(0) == mask.size(0));
@@ -66,7 +65,7 @@ bool BooleanMaskOp<CPUContext>::RunOnDevice() {
    std::vector<int64_t> outShape;
    outShape.push_back(numOutputs);
    outShape.insert(outShape.end(), data.sizes().begin() + 1, data.sizes().end());
-  dataOut->Resize(outShape);
+  auto* dataOut = Output(0, outShape, at::dtype(data.dtype()));
    auto* outPtr = (char*)dataOut->raw_mutable_data(data.dtype());
  
    int64_t* out_vec = nullptr;
diff --git a/caffe2/operators/boolean_mask_ops.cu b/caffe2/operators/boolean_mask_ops.cu

index 4cbf368..d809dff 100644 (file)
--- a/caffe2/operators/boolean_mask_ops.cu
+++ b/caffe2/operators/boolean_mask_ops.cu
@@ -31,7 +31,6 @@ class BooleanMaskOp<CUDAContext> final : public Operator<CUDAContext> {
    bool RunOnDevice() override {
      const auto& src = Input(0);
      const auto& mask = Input(1);
-    auto* dest = Output(0);
  
      CAFFE_ENFORCE(src.dim() >= 1);
      CAFFE_ENFORCE_EQ(mask.dim(), 1);
@@ -80,8 +79,8 @@ class BooleanMaskOp<CUDAContext> final : public Operator<CUDAContext> {
      indices_.Resize(numOfOutput);
      std::vector<int64_t> dims = src.sizes().vec();
      dims[0] = numOfOutput;
-    dest->Resize(dims);
-    auto* destData = (uint8_t*)dest->raw_mutable_data(src.meta());
+    auto* dest = Output(0, dims, at::dtype(src.dtype()));
+    auto* destData = (uint8_t*)dest->raw_mutable_data(src.dtype());
      const auto* srcData = (uint8_t*)src.raw_data();
      if (OutputSize() == 2) {
  
diff --git a/caffe2/operators/boolean_unmask_ops.cc b/caffe2/operators/boolean_unmask_ops.cc

index 55e2449..75b33fd 100644 (file)
--- a/caffe2/operators/boolean_unmask_ops.cc
+++ b/caffe2/operators/boolean_unmask_ops.cc
@@ -8,11 +8,10 @@ template <>
  bool BooleanUnmaskOp<CPUContext>::RunOnDevice() {
    int maskSize = Input(0).numel();
    int numMasks = InputSize() / 2;
-  auto& valueMeta = Input(1).dtype();
+  auto& valueDtype = Input(1).dtype();
  
-  auto* valuesOut = Output(0);
-  valuesOut->Resize(maskSize);
-  auto* valuesOutPtr = (char*)valuesOut->raw_mutable_data(valueMeta);
+  auto* valuesOut = Output(0, maskSize, at::dtype(valueDtype));
+  auto* valuesOutPtr = (char*)valuesOut->raw_mutable_data(valueDtype);
  
    std::vector<int> nextValueIndices(numMasks, 0);
    for (int maskOffset = 0; maskOffset < maskSize; ++maskOffset) {
@@ -30,9 +29,9 @@ bool BooleanUnmaskOp<CPUContext>::RunOnDevice() {
        if (maskPtr[maskOffset]) {
          auto& valueIndex = nextValueIndices[maskIndex];
          CAFFE_ENFORCE_LT(valueIndex, values.numel());
-        auto* src = valuesPtr + (valueIndex++) * valueMeta.itemsize();
-        auto* dst = valuesOutPtr + maskOffset * valueMeta.itemsize();
-        std::copy(src, src + valueMeta.itemsize(), dst);
+        auto* src = valuesPtr + (valueIndex++) * valueDtype.itemsize();
+        auto* dst = valuesOutPtr + maskOffset * valueDtype.itemsize();
+        std::copy(src, src + valueDtype.itemsize(), dst);
          maskFound = true;
          break;
        }
diff --git a/caffe2/operators/boolean_unmask_ops.cu b/caffe2/operators/boolean_unmask_ops.cu

index ce90dab..27de5ae 100644 (file)
--- a/caffe2/operators/boolean_unmask_ops.cu
+++ b/caffe2/operators/boolean_unmask_ops.cu
@@ -54,11 +54,10 @@ class BooleanUnmaskOp<CUDAContext> final : public Operator<CUDAContext> {
    bool RunOnDevice() override {
      int maskSize = Input(0).numel();
      int numMasks = InputSize() / 2;
-    const auto& meta = Input(1).meta();
+    const auto& dtype = Input(1).dtype();
  
-    auto* out = Output(0);
-    out->Resize(maskSize);
-    auto* dest = (char*)out->raw_mutable_data(meta);
+    auto* out = Output(0, maskSize, at::dtype(dtype));
+    auto* dest = (char*)out->raw_mutable_data(dtype);
  
      ReinitializeTensor(&hostMasks_, {numMasks}, at::dtype<bool*>().device(CPU));
      auto* hostMasksData = hostMasks_.mutable_data<bool*>();
@@ -101,7 +100,7 @@ class BooleanUnmaskOp<CUDAContext> final : public Operator<CUDAContext> {
          context_.cuda_stream()>>>(
          numMasks,
          maskSize,
-        meta.itemsize(),
+        dtype.itemsize(),
          indicesData,
          values_.data<char*>(),
          valueSizesData,
diff --git a/caffe2/operators/concat_split_op.h b/caffe2/operators/concat_split_op.h

index 47ed663..4382e89 100644 (file)
--- a/caffe2/operators/concat_split_op.h
+++ b/caffe2/operators/concat_split_op.h
@@ -177,12 +177,11 @@ bool SplitOp<Context>::RunOnDevice() {
    }
    size_t input_offset = 0;
    for (int i = 0; i < OutputSize(); ++i) {
-    auto* output = Output(i);
      auto axis_dim = add_axis_ ? 1 : axis_data[i];
      if (!add_axis_) {
        output_dims[canonical_axis] = axis_data[i];
      }
-    output->Resize(output_dims);
+    auto* output = Output(i, output_dims, at::dtype(input.dtype()));
      math::CopyMatrix<Context>(
          input.itemsize(),
          before,
@@ -223,12 +222,11 @@ bool SplitByLengthsOp<Context>::RunOnDevice() {
    int after = input.size_from_dim(canonical_axis + 1);
    size_t input_offset = 0;
    for (int i = 0; i < OutputSize(); ++i) {
-    auto* output = Output(i);
      const auto* axis_offset = axis_data + length_length / OutputSize() * i;
      auto axis_dim = std::accumulate(
          axis_offset, axis_offset + length_length / OutputSize(), 0);
      output_dims[canonical_axis] = axis_dim;
-    output->Resize(output_dims);
+    auto* output = Output(i, output_dims, at::dtype(input.dtype()));
      math::CopyMatrix<Context>(
          input.itemsize(),
          before,
@@ -246,8 +244,6 @@ bool SplitByLengthsOp<Context>::RunOnDevice() {
  
  template <class Context>
  bool ConcatOp<Context>::RunOnDevice() {
-  auto* output = Output(0);
-
    // We can override default options(Context::GetDeviceType())
    // by explictly passing in device type we want
    Tensor* split = Output(
@@ -314,7 +310,7 @@ bool ConcatOp<Context>::RunOnDevice() {
    } else {
      output_dims[canonical_axis] = output_channels;
    }
-  output->Resize(output_dims);
+  auto* output = Output(0, output_dims, at::dtype(input_zero.dtype()));
    size_t output_offset = 0;
    for (int i = 0; i < InputSize(); ++i) {
      auto& input = Input(i);
diff --git a/caffe2/operators/conditional_op.cc b/caffe2/operators/conditional_op.cc

index 6097912..780efe3 100644 (file)
--- a/caffe2/operators/conditional_op.cc
+++ b/caffe2/operators/conditional_op.cc
@@ -23,9 +23,8 @@ bool ConditionalOp<CPUContext>::RunOnDevice() {
    CAFFE_ENFORCE(innerSize * dataF.dtype().itemsize() == innerSizeBytes);
  
    // initialize output shape
-  auto* dataOut = Output(0);
    const auto* condPtr = condition.template data<bool>();
-  dataOut->ResizeLike(dataT);
+  auto* dataOut = Output(0, dataT.sizes(), at::dtype(dataT.dtype()));
    auto* outPtr = (char*)dataOut->raw_mutable_data(dataT.dtype());
  
    // perform conditional op along first dimension
diff --git a/caffe2/operators/copy_op.cu b/caffe2/operators/copy_op.cu

index 2091524..bc1d734 100644 (file)
--- a/caffe2/operators/copy_op.cu
+++ b/caffe2/operators/copy_op.cu
@@ -13,14 +13,14 @@ class CopyOnDeviceLikeOp<CUDAContext, CUDAContext, CUDAContext>
  
    bool RunOnDevice() override {
      auto& input = Input(0);
-    auto* output = OperatorBase::Output<Tensor>(0, CUDA);
+    auto* output = OperatorBase::OutputTensor(
+        0, input.sizes(), at::dtype(input.dtype()).device(CUDA));
      CUDAContext context(GetGPUIDForPointer(Input(1).raw_data()));
-    output->ResizeLike(input);
      context.template CopyItems<CUDAContext, CUDAContext>(
-        input.meta(),
+        input.dtype(),
          input.numel(),
          input.raw_data(),
-        output->raw_mutable_data(input.meta()));
+        output->raw_mutable_data(input.dtype()));
      return true;
    }
  };
diff --git a/caffe2/operators/copy_op.h b/caffe2/operators/copy_op.h

index 8ccbcb9..86e6c45 100644 (file)
--- a/caffe2/operators/copy_op.h
+++ b/caffe2/operators/copy_op.h
@@ -14,9 +14,10 @@ class CopyOp : public Operator<Context> {
  
    bool RunOnDevice() override {
      auto& input = this->template Input<Tensor>(0, SrcContext::GetDeviceType());
-    auto* output =
-        this->template Output<Tensor>(0, DstContext::GetDeviceType());
-    output->ResizeLike(input);
+    auto* output = this->OutputTensor(
+        0,
+        input.sizes(),
+        at::dtype(input.dtype()).device(DstContext::GetDeviceType()));
      this->context_.template CopyItems<SrcContext, DstContext>(
          input.dtype(),
          input.numel(),
diff --git a/caffe2/operators/crf_viterbi_op.cc b/caffe2/operators/crf_viterbi_op.cc

index 07630a6..c99c632 100644 (file)
--- a/caffe2/operators/crf_viterbi_op.cc
+++ b/caffe2/operators/crf_viterbi_op.cc
@@ -94,13 +94,16 @@ class ViterbiPathOp : public Operator<CPUContext> {
      auto block_size = predictions.numel() / predictions.size(0);
      auto block_bytesize =
          predictions.size_from_dim(1) * predictions.dtype().itemsize();
-    Tensor backpointers(CPU);
-    backpointers.ResizeLike(predictions);
-
-    Tensor trellis(std::vector<int64_t>{block_size}, CPU);
-    Tensor dpMat(CPU);
-    dpMat.ResizeLike(transitions);
-    Tensor dpMax(std::vector<int64_t>{block_size}, CPU);
+    Tensor backpointers =
+        caffe2::empty(predictions.sizes(), at::dtype<int32_t>().device(CPU));
+
+    Tensor trellis = caffe2::empty(
+        std::vector<int64_t>{block_size},
+        at::dtype(predictions.dtype()).device(CPU));
+    Tensor dpMat =
+        caffe2::empty(transitions.sizes(), at::dtype<float>().device(CPU));
+    Tensor dpMax = caffe2::empty(
+        std::vector<int64_t>{block_size}, at::dtype<float>().device(CPU));
      GatherRow(predictions, 0, block_size, block_bytesize, &trellis);
      for (auto i = 1; i < seqLen; i++) {
        AddColToMat(transitions, trellis, &dpMat);
@@ -120,8 +123,10 @@ class ViterbiPathOp : public Operator<CPUContext> {
            &context_);
      }
  
-    Tensor tMax(std::vector<int64_t>{1}, CPU);
-    Tensor tArgMax(std::vector<int64_t>{1}, CPU);
+    Tensor tMax =
+        caffe2::empty(std::vector<int64_t>{1}, at::dtype<float>().device(CPU));
+    Tensor tArgMax = caffe2::empty(
+        std::vector<int64_t>{1}, at::dtype<int32_t>().device(CPU));
      ColwiseMaxAndArg(
          trellis.template data<float>(),
          1,
@@ -131,7 +136,9 @@ class ViterbiPathOp : public Operator<CPUContext> {
  
      std::vector<int32_t> viterbiVec;
      viterbiVec.push_back(tArgMax.template data<int32_t>()[0]);
-    Tensor bpEntry(std::vector<int64_t>{block_size}, CPU);
+    Tensor bpEntry = caffe2::empty(
+        std::vector<int64_t>{block_size},
+        at::dtype(backpointers.dtype()).device(CPU));
      block_bytesize =
          backpointers.size_from_dim(1) * backpointers.dtype().itemsize();
      for (auto i = seqLen - 1; i > 0; i--) {
@@ -152,14 +159,14 @@ class SwapBestPathOp : public Operator<CPUContext> {
        : Operator(std::forward<Args>(args)...) {}
    bool RunOnDevice() override {
      auto& data = Input(0);
-    auto& newBestIdicies = Input(1);
+    auto& newBestIndicies = Input(1);
  
      CAFFE_ENFORCE(
-        data.dim() == 2 && newBestIdicies.dim() == 1,
+        data.dim() == 2 && newBestIndicies.dim() == 1,
          "predictions should be a 2D matrix and  bestPath should be 1D vector");
  
      CAFFE_ENFORCE(
-        data.size(0) == newBestIdicies.size(0),
+        data.size(0) == newBestIndicies.size(0),
          "predictions and bestPath dimensions not matching");
  
      auto* updatedData = Output(0, data.sizes(), at::dtype<float>());
@@ -167,10 +174,10 @@ class SwapBestPathOp : public Operator<CPUContext> {
      context_.CopyItemsSameDevice(
          data.dtype(), data.numel(), data.template data<float>(), outData);
  
-    Tensor bestScores(CPU);
-    bestScores.ResizeLike(newBestIdicies);
-    Tensor oldBestIndices(CPU);
-    oldBestIndices.ResizeLike(newBestIdicies);
+    Tensor bestScores =
+        caffe2::empty(newBestIndicies.sizes(), at::dtype<float>().device(CPU));
+    Tensor oldBestIndices = caffe2::empty(
+        newBestIndicies.sizes(), at::dtype<int32_t>().device(CPU));
  
      ColwiseMaxAndArg(
          data.template data<float>(),
@@ -182,7 +189,7 @@ class SwapBestPathOp : public Operator<CPUContext> {
      auto block_size = data.numel() / data.size(0);
  
      const int32_t* oldBestIdx = oldBestIndices.template data<int32_t>();
-    const int32_t* newIdx = newBestIdicies.template data<int32_t>();
+    const int32_t* newIdx = newBestIndicies.template data<int32_t>();
  
      for (auto i = 0; i < data.dim32(0); i++) {
        std::swap(
diff --git a/caffe2/operators/dataset_ops.cc b/caffe2/operators/dataset_ops.cc

index 78fe003..73004d0 100644 (file)
--- a/caffe2/operators/dataset_ops.cc
+++ b/caffe2/operators/dataset_ops.cc
@@ -319,7 +319,11 @@ class PackRecordsOp : public Operator<CPUContext> {
      Output(0)->Resize(walker.size());
  
      // Output(0)->raw_mutable_data(TypeMeta::Make<SharedTensorVectorPtr>()));
-    auto* dst = Output(0)->template mutable_data<SharedTensorVectorPtr>();
+    auto* dst = Output(
+                    0,
+                    {static_cast<int64_t>(walker.size())},
+                    at::dtype<SharedTensorVectorPtr>())
+                    ->template mutable_data<SharedTensorVectorPtr>();
  
      for (int batchId = 0; batchId < walker.size(); ++batchId) {
        dst[batchId] = std::make_shared<std::vector<TensorCPU>>();
@@ -395,8 +399,8 @@ class UnPackRecordsOp : public Operator<CPUContext> {
      // Resize to the final output size
      std::vector<void*> destinations(numTensors);
      for (int i = 0; i < numTensors; ++i) {
-      Output(i)->Resize(outputDims[i]);
-      destinations[i] = Output(i)->raw_mutable_data(*metas[i]);
+      auto* output = Output(i, {outputDims[i]}, at::dtype(*metas[i]));
+      destinations[i] = output->raw_mutable_data(*metas[i]);
      }
  
      for (int i = 0; i < numRows; ++i) {
@@ -517,10 +521,9 @@ class ReadNextBatchOp : public Operator<CPUContext> {
        auto innerSize = in.size_from_dim(1);
        outDim = in.sizes().vec();
        outDim[0] = size;
-      auto* out = Output(i);
-      out->Resize(outDim);
        void* src =
            (char*)in.raw_data() + offset * innerSize * in.dtype().itemsize();
+      auto* out = Output(i, {outDim}, at::dtype(in.dtype()));
        void* dst = out->raw_mutable_data(in.dtype()); // create the tensor
        if (out->numel() == 0) {
          continue;
@@ -725,8 +728,7 @@ class ReadRandomBatchOp : public Operator<CPUContext> {
          idx++;
        }
        idx = idxbegin; // reSet
-      auto* out = Output(i);
-      out->Resize(outDim);
+      auto* out = Output(i, {outDim}, at::dtype(in.dtype()));
        if (out->numel() == 0) {
          continue;
        }
@@ -773,13 +775,13 @@ class AppendOp final : public Operator<Context> {
    bool RunOnDevice() override {
      auto& a = Input(0);
      auto& b = Input(1);
-    auto* c = Output(0);
+    auto* c = Output(0, a.sizes(), at::dtype(a.dtype()));
      CAFFE_ENFORCE(b.dim() >= 1);
      if (a.numel() == 0 && a.size(0) == 0) {
        c->CopyFrom(b);
        return true;
      }
-    CAFFE_ENFORCE(&a == c, "First argument must be in-place.");
+    CAFFE_ENFORCE(IsInputOutputAlias(0, 0), "First argument must be in-place.");
      CAFFE_ENFORCE(c->dim() == b.dim());
      CAFFE_ENFORCE(b.dim() == c->dim());
      CAFFE_ENFORCE(a.dtype() == b.dtype());
@@ -813,13 +815,14 @@ class AtomicAppendOp final : public Operator<Context> {
      for (int i = 0; i < numFields; ++i) {
        auto& a = Input(1 + i);
        auto& b = Input(1 + i + numFields);
-      auto* c = Output(i);
+      auto* c = Output(i, a.sizes(), at::dtype(a.dtype()));
        CAFFE_ENFORCE(b.dim() >= 1);
        if (a.numel() == 0) {
          continue;
        }
        CAFFE_ENFORCE(
-          (void*)&a == (void*)c, "Appended-to arguments must be in-place.");
+          IsInputOutputAlias(1 + i, i),
+          "Appended-to arguments must be in-place.");
        CAFFE_ENFORCE(c->dim() == b.dim());
        CAFFE_ENFORCE(b.dim() == c->dim());
        CAFFE_ENFORCE(a.dtype() == b.dtype());
@@ -832,7 +835,8 @@ class AtomicAppendOp final : public Operator<Context> {
      for (int i = 0; i < numFields; ++i) {
        auto& a = Input(1 + i);
        auto& b = Input(1 + i + numFields);
-      auto* c = Output(i);
+      // Can we create Tensor with numel() == 0?
+      auto* c = Output(i, a.sizes(), at::dtype(a.dtype()));
        if (a.numel() == 0 && a.size(0) == 0) {
          c->CopyFrom(b);
          continue;
@@ -892,7 +896,6 @@ class ConcatTensorVectorOp final : public Operator<Context> {
      const TensorVectorPtr& tensorVector =
          OperatorBase::Input<TensorVectorPtr>(TENSOR_VECTOR);
  
-    auto* tensor = Output(TENSOR);
      CAFFE_ENFORCE(!tensorVector->empty());
  
      vector<int64_t> outputDims(tensorVector->at(0).sizes().vec());
@@ -906,7 +909,8 @@ class ConcatTensorVectorOp final : public Operator<Context> {
        outputDims[0] += tensorVector->at(i).sizes()[0];
      }
  
-    tensor->Resize(outputDims);
+    auto* tensor =
+        Output(TENSOR, outputDims, at::dtype(tensorVector->at(0).dtype()));
      int64_t offset = 0;
      auto* dst = (char*)tensor->raw_mutable_data(tensorVector->at(0).dtype());
  
@@ -1021,6 +1025,8 @@ class TrimDatasetOp : public Operator<CPUContext> {
      // trim each column to the offset
      for (int col = 0; col < walker.fields().size(); ++col) {
        auto newOuterSize = walker.fields().at(col).offset();
+      // TODO: Remove call to Output(col) since it
+      // returns partially initialized Tensor
        Output(col)->ShrinkTo(newOuterSize);
      }
      return true;
diff --git a/caffe2/operators/ensure_cpu_output_op.h b/caffe2/operators/ensure_cpu_output_op.h

index 04255ed..6d06e1f 100644 (file)
--- a/caffe2/operators/ensure_cpu_output_op.h
+++ b/caffe2/operators/ensure_cpu_output_op.h
@@ -33,9 +33,10 @@ class EnsureCPUOutputOp : public Operator<Context> {
    template <class InputContext>
    bool CopyWithContext() {
      // Output is always on CPU
-    auto* output = this->template Output<Tensor>(0, CPU);
      auto& input = this->template Input<Tensor>(0, InputContext::GetDeviceType());
-    output->ResizeLike(input);
+    // TODO: is it possible to use OutputTensorCopyFrom?
+    auto* output = this->OutputTensor(
+        0, input.sizes(), at::dtype(input.dtype()).device(CPU));
      context_.CopyItemsToCPU(
          input.dtype(),
          input.numel(),
diff --git a/caffe2/operators/flatten_op.h b/caffe2/operators/flatten_op.h

index 401e6fb..f840347 100644 (file)
--- a/caffe2/operators/flatten_op.h
+++ b/caffe2/operators/flatten_op.h
@@ -17,10 +17,12 @@ class FlattenOp : public Operator<Context> {
  
    bool RunOnDevice() override {
      auto& input = Input(0);
-    auto* output = Output(0);
      CAFFE_ENFORCE_GE(
          input.dim(), axis_, "The rank of the tensor must be >= axis.");
-    output->Resize(input.size_to_dim(axis_), input.size_from_dim(axis_));
+    auto* output = Output(
+        0,
+        {input.size_to_dim(axis_), input.size_from_dim(axis_)},
+        at::dtype(input.dtype()));
      context_.CopyItemsSameDevice(
          input.dtype(),
          input.numel(),
diff --git a/caffe2/operators/gather_ranges_to_dense_op.h b/caffe2/operators/gather_ranges_to_dense_op.h

index ee8c09a..60947c7 100644 (file)
--- a/caffe2/operators/gather_ranges_to_dense_op.h
+++ b/caffe2/operators/gather_ranges_to_dense_op.h
@@ -66,9 +66,8 @@ class GatherRangesToDenseOp final : public Operator<Context> {
      vector<int64_t> outputDims{batchSize, 0};
      vector<char*> outputRawData;
      for (int i = 0; i < OutputSize(); ++i) {
-      auto* output = Output(i);
        outputDims[1] = lengths_[i];
-      output->Resize(outputDims);
+      auto* output = Output(i, outputDims, at::dtype(data.dtype()));
        char* ptr = static_cast<char*>(output->raw_mutable_data(data.dtype()));
        memset(ptr, 0, output->nbytes());
        outputRawData.push_back(ptr);
diff --git a/caffe2/operators/lengths_tile_op.cc b/caffe2/operators/lengths_tile_op.cc

index 5e3f167..10178e7 100644 (file)
--- a/caffe2/operators/lengths_tile_op.cc
+++ b/caffe2/operators/lengths_tile_op.cc
@@ -6,7 +6,6 @@ template <>
  bool LengthsTileOp<CPUContext>::RunOnDevice() {
    auto& data = Input(DATA);
    auto& lengths = Input(LENGTHS);
-  auto* output = Output(0);
  
    CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTHS must be 1-D");
    CAFFE_ENFORCE_GE(data.dim(), 1, "DATA should be at least 1-D");
@@ -26,7 +25,7 @@ bool LengthsTileOp<CPUContext>::RunOnDevice() {
  
    auto shape = data.sizes().vec();
    shape[0] = total_length;
-  output->Resize(shape);
+  auto* output = Output(0, shape, at::dtype(data.dtype()));
  
    auto block_bytesize = data.size_from_dim(1) * data.dtype().itemsize();
    auto src = static_cast<const char*>(data.raw_data());
diff --git a/caffe2/operators/pack_segments.cc b/caffe2/operators/pack_segments.cc

index 8e82ed0..ac52c1b 100644 (file)
--- a/caffe2/operators/pack_segments.cc
+++ b/caffe2/operators/pack_segments.cc
@@ -116,7 +116,6 @@ template <typename T, typename Data_T>
  bool UnpackSegmentsOp<CPUContext>::DoRunWithType2() {
    const auto& data = Input(DATA);
    const auto& lengths = Input(LENGTHS);
-  auto* output = Output(0);
  
    CAFFE_ENFORCE_GE(data.dim(), 2, "DATA should be at least 2-D");
    CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTH should be 1-D");
@@ -135,7 +134,7 @@ bool UnpackSegmentsOp<CPUContext>::DoRunWithType2() {
        shape[0], lengths.size(0), "LENGTH should match DATA in dimension 0");
    shape.erase(shape.begin());
    shape[0] = total_l;
-  output->Resize(shape);
+  auto* output = Output(0, shape, at::dtype(data.dtype()));
    // create output tensor
    auto* out = static_cast<char*>(output->raw_mutable_data(data.dtype()));
    if (!(data.size(0) && data.size(1))) {
diff --git a/caffe2/operators/pack_segments.cu b/caffe2/operators/pack_segments.cu

index 213bb60..3e6a2b6 100644 (file)
--- a/caffe2/operators/pack_segments.cu
+++ b/caffe2/operators/pack_segments.cu
@@ -179,11 +179,6 @@ bool PackSegmentsOp<CUDAContext>::DoRunWithType2() {
    int64_t num_seq = lengths.dim(0);
    const Data_T* data_ptr = data.data<Data_T>();
    const T* lengths_ptr = lengths.data<T>();
-  auto* out = Output(0);
-  Tensor* presence_mask = nullptr;
-  if (return_presence_mask_) {
-    presence_mask = Output(1);
-  }
  
    CAFFE_ENFORCE_GE(data.dim(), 1, "DATA should be at least 1-D");
    CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTH should be 1-D");
@@ -214,7 +209,7 @@ bool PackSegmentsOp<CUDAContext>::DoRunWithType2() {
    bool* presence_mask_data = nullptr;
    if (return_presence_mask_) {
      std::vector<int64_t> presence_shape{lengths.numel(), max_length};
-    presence_mask->Resize(presence_shape);
+    auto* presence_mask = Output(1, presence_shape, at::dtype<bool>());
      presence_mask_data = presence_mask->template mutable_data<bool>();
    }
  
@@ -222,8 +217,8 @@ bool PackSegmentsOp<CUDAContext>::DoRunWithType2() {
    auto shape = data.sizes().vec(); // Shape of out is batch_size x max_len x ...
    shape[0] = max_length;
    shape.insert(shape.begin(), lengths.numel());
-  out->Resize(shape);
-  Data_T* out_ptr = static_cast<Data_T*>(out->raw_mutable_data(data.meta()));
+  auto* out = Output(0, shape, at::dtype(data.dtype()));
+  Data_T* out_ptr = static_cast<Data_T*>(out->raw_mutable_data(data.dtype()));
  
    // Return empty out (with the proper shape) if first dim is 0.
    if (!data.dim(0)) {
@@ -265,7 +260,6 @@ bool UnpackSegmentsOp<CUDAContext>::DoRunWithType2() {
    int64_t num_seq = lengths.dim(0);
    const Data_T* data_ptr = data.data<Data_T>();
    const T* lengths_ptr = lengths.data<T>();
-  auto* out = Output(0);
  
    CAFFE_ENFORCE_GE(data.dim(), 1, "DATA should be at least 1-D");
    CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTH should be 1-D");
@@ -315,8 +309,8 @@ bool UnpackSegmentsOp<CUDAContext>::DoRunWithType2() {
        shape[0], lengths.dim(0), "LENGTH should match DATA in dimension 0");
    shape.erase(shape.begin());
    shape[0] = num_cell;
-  out->Resize(shape);
-  Data_T* out_ptr = static_cast<Data_T*>(out->raw_mutable_data(data.meta()));
+  auto* out = Output(0, shape, at::dtype(data.dtype()));
+  Data_T* out_ptr = static_cast<Data_T*>(out->raw_mutable_data(data.dtype()));
  
    // Return empty out (with the proper shape) if any of the dimensions is 0.
    if (data.dim(0) == 0 || data.dim(1) == 0) {
diff --git a/caffe2/operators/partition_ops.h b/caffe2/operators/partition_ops.h

index fa8a27c..f5f7dc9 100644 (file)
--- a/caffe2/operators/partition_ops.h
+++ b/caffe2/operators/partition_ops.h
@@ -60,8 +60,7 @@ class GatherByKeyOp : public Operator<CPUContext> {
      }
      CAFFE_ENFORCE_EQ(keysTensor.numel(), totalSize);
  
-    auto* outTensor = Output(0);
-    outTensor->Resize(outShape);
+    auto* outTensor = Output(0, outShape, at::dtype(meta));
      auto* outData = static_cast<char*>(outTensor->raw_mutable_data(meta));
      const auto blockSize = outTensor->size_from_dim(1);
  
@@ -164,9 +163,8 @@ class PartitionOpBase : public Operator<CPUContext> {
            input.sizes().begin() + main_input.dim() - 1, input.sizes().end());
        for (int j = 0; j < partitions; ++j) {
          int out_idx = i + j * inputSize;
-        auto output = Output(out_idx);
          shape[0] = counts_[j];
-        output->Resize(shape);
+        auto output = Output(out_idx, shape, at::dtype(input.dtype()));
          out_datas_[out_idx] = output->raw_mutable_data(input.dtype());
        }
      }
@@ -256,13 +254,12 @@ class LengthsPartitionOp : public PartitionOpBase {
        // Specialization when partitions == 1 which just becomes a copy.
        for (int i = 0; i < InputSize(); ++i) {
          auto& input = Input(i);
-        auto& output = *Output(i);
-        output.ResizeLike(input);
+        auto* output = Output(i, input.sizes(), at::dtype(input.dtype()));
          context_.CopyItemsSameDevice(
              input.dtype(),
              input.numel(),
              input.raw_data(),
-            output.raw_mutable_data(input.dtype()));
+            output->raw_mutable_data(input.dtype()));
        }
        return true;
      }
@@ -280,9 +277,8 @@ class LengthsPartitionOp : public PartitionOpBase {
      const int32_t* lengths_data = length_input.template data<int32_t>();
      out_length_.resize(partitions);
      for (int i = 0; i < partitions; ++i) {
-      auto& output = *Output(i * InputSize());
-      output.Resize(elements);
-      out_length_[i] = output.template mutable_data<int32_t>();
+      auto* output = Output(i * InputSize(), elements, at::dtype<int32_t>());
+      out_length_[i] = output->template mutable_data<int32_t>();
      }
  
      int total_length = 0;
diff --git a/caffe2/operators/prepend_dim_op.h b/caffe2/operators/prepend_dim_op.h

index 2df840d..5cc8d64 100644 (file)
--- a/caffe2/operators/prepend_dim_op.h
+++ b/caffe2/operators/prepend_dim_op.h
@@ -23,7 +23,6 @@ class PrependDimOp : public Operator<Context> {
  
    bool RunOnDevice() override {
      auto& input = Input(0);
-    auto* output = Output(0);
  
      CAFFE_ENFORCE(input.dim() > 0, "Input must be at least 1D.");
      CAFFE_ENFORCE(
@@ -37,9 +36,9 @@ class PrependDimOp : public Operator<Context> {
      for (int i = 1; i < input.sizes().size(); ++i) {
        actual_new_shape[i + 1] = input.size(i);
      }
-    output->Resize(actual_new_shape);
+    auto* output = Output(0, actual_new_shape, at::dtype(input.dtype()));
  
-    if (output != &input) {
+    if (!IsInputOutputAlias(0, 0)) {
        // If we are not doing in-place computation, a copy is needed.
        context_.CopyItemsSameDevice(
            input.dtype(),
@@ -64,7 +63,6 @@ class MergeDimOp : public Operator<Context> {
  
    bool RunOnDevice() override {
      auto& input = Input(0);
-    auto* output = Output(0);
  
      CAFFE_ENFORCE(input.dim() > 1, "Input must be at least 2D.");
  
@@ -73,9 +71,9 @@ class MergeDimOp : public Operator<Context> {
      for (int i = 1; i < input.sizes().size() - 1; ++i) {
        actual_new_shape[i] = input.size(i + 1);
      }
-    output->Resize(actual_new_shape);
+    auto* output = Output(0, actual_new_shape, at::dtype(input.dtype()));
  
-    if (output != &input) {
+    if (!IsInputOutputAlias(0, 0)) {
        // If we are not doing in-place computation, a copy is needed.
        context_.CopyItemsSameDevice(
            input.dtype(),
diff --git a/caffe2/operators/remove_data_blocks_op.h b/caffe2/operators/remove_data_blocks_op.h

index 5f409bf..9303904 100644 (file)
--- a/caffe2/operators/remove_data_blocks_op.h
+++ b/caffe2/operators/remove_data_blocks_op.h
@@ -52,10 +52,9 @@ class RemoveDataBlocksOp final : public Operator<Context> {
      ind_vec.erase(std::unique(ind_vec.begin(), ind_vec.end()), ind_vec.end());
      indices_size = ind_vec.size();
  
-    auto* output = Output(0);
      auto shape = data.sizes().vec();
      shape[0] -= indices_size;
-    output->Resize(shape);
+    auto* output = Output(0, shape, at::dtype(data.dtype()));
      char* out_ptr = (char*)output->raw_mutable_data(data.dtype());
  
      ind_vec.insert(ind_vec.begin(), -1);
diff --git a/caffe2/operators/reservoir_sampling.cc b/caffe2/operators/reservoir_sampling.cc

index 0e125dd..206ec90 100644 (file)
--- a/caffe2/operators/reservoir_sampling.cc
+++ b/caffe2/operators/reservoir_sampling.cc
@@ -23,6 +23,7 @@ class ReservoirSamplingOp final : public Operator<Context> {
      auto& mutex = OperatorBase::Input<std::unique_ptr<std::mutex>>(MUTEX);
      std::lock_guard<std::mutex> guard(*mutex);
  
+    // TODO: separate diff for this
      auto* output = Output(RESERVOIR);
      const auto& input = Input(DATA);
  
diff --git a/caffe2/operators/reshape_op.h b/caffe2/operators/reshape_op.h

index c7f4b16..0ad70e7 100644 (file)
--- a/caffe2/operators/reshape_op.h
+++ b/caffe2/operators/reshape_op.h
@@ -30,7 +30,8 @@ class ReshapeOp : public Operator<Context> {
  
    template <typename T>
    bool DoRunWithType() {
-    DoRunWithTypeImpl<T>(Input(0), Output(0));
+    DoRunWithTypeImpl<T>(
+        Input(0), Output(0, Input(0).sizes(), Input(0).dtype()));
      return true;
    }
  
@@ -123,7 +124,7 @@ class ReshapeOp : public Operator<Context> {
      }
  
      output->Resize(actual_new_shape);
-    if (output != &input) {
+    if (!IsInputOutputAlias(0, 0)) {
        // If we are not doing in-place computation, a copy is needed.
        context_.CopyItemsSameDevice(
            input.dtype(),
diff --git a/caffe2/operators/sequence_ops.cc b/caffe2/operators/sequence_ops.cc

index dfb01ad..b1e2a8a 100644 (file)
--- a/caffe2/operators/sequence_ops.cc
+++ b/caffe2/operators/sequence_ops.cc
@@ -192,16 +192,15 @@ bool PadEmptySamplesOp<CPUContext>::RunOnDevice() {
          features.size(0) == sumLen, "FEATURE and LENGTH should be consistent");
      const auto block_size = features.size_from_dim(1);
  
-    auto* out_features = Output(1 + k);
      auto outDim = features.sizes().vec();
      outDim.at(0) += needPadding;
-    out_features->Resize(outDim);
+    auto* out_features = Output(1 + k, outDim, at::dtype(features.dtype()));
      auto dst =
          static_cast<char*>(out_features->raw_mutable_data(features.dtype()));
      auto src_base = static_cast<const char*>(features.raw_data());
      // copy data and add padding index as zero
-    Tensor zero{CPU};
-    zero.Resize(block_size);
+    Tensor zero =
+        caffe2::empty({block_size}, at::dtype(features.dtype()).device(CPU));
      auto zeroPtr = static_cast<char*>(zero.raw_mutable_data(features.dtype()));
      memset(zeroPtr, 0, zero.nbytes());
      int start_dest = 0;
diff --git a/caffe2/operators/text_file_reader.cc b/caffe2/operators/text_file_reader.cc

index 5a3e80d..4afd689 100644 (file)
--- a/caffe2/operators/text_file_reader.cc
+++ b/caffe2/operators/text_file_reader.cc
@@ -110,8 +110,8 @@ class TextFileReaderReadOp : public Operator<CPUContext> {
      // it.
      std::vector<char*> datas(numFields);
      for (int i = 0; i < numFields; ++i) {
-      Output(i)->Resize(batchSize_);
-      datas[i] = (char*)Output(i)->raw_mutable_data(instance->fieldMetas[i]);
+      auto* output = Output(i, batchSize_, at::dtype(instance->fieldMetas[i]));
+      datas[i] = (char*)output->raw_mutable_data(instance->fieldMetas[i]);
      }
  
      int rowsRead = 0;
diff --git a/caffe2/operators/tile_op.h b/caffe2/operators/tile_op.h

index ad0b924..72cd56d 100644 (file)
--- a/caffe2/operators/tile_op.h
+++ b/caffe2/operators/tile_op.h
@@ -74,13 +74,12 @@ class TileOp final : public Operator<Context> {
      }
  
      const auto& X = Input(0);
-    auto* Y = Output(0);
      const int axis = X.canonical_axis_index(axis_);
  
      // reshape output to be input tiled along the axis
      std::vector<std::int64_t> Y_dims = X.sizes().vec();
      Y_dims[axis] *= tiles_;
-    Y->Resize(Y_dims);
+    auto* Y = Output(0, Y_dims, at::dtype<T>());
  
      // size up to (and not including) axis
      const int outer_size = X.size_to_dim(axis);
@@ -179,14 +178,13 @@ class TileGradientOp final : public Operator<Context> {
      }
  
      const auto& dY = Input(0);
-    auto* dX = Output(0);
      const int axis = dY.canonical_axis_index(axis_);
  
      // reshape output to be input "untiled" along the axis
      std::vector<std::int64_t> X_dims = dY.sizes().vec();
      CAFFE_ENFORCE_EQ(X_dims[axis] % tiles_, 0);
      X_dims[axis] /= tiles_;
-    dX->Resize(X_dims);
+    auto* dX = Output(0, X_dims, at::dtype<T>());
  
      // size up to (and not including) axis
      const int outer_size = dX->size_to_dim(axis);
diff --git a/caffe2/operators/utility_ops.h b/caffe2/operators/utility_ops.h

index 2b38d1b..af79f44 100644 (file)
--- a/caffe2/operators/utility_ops.h
+++ b/caffe2/operators/utility_ops.h
@@ -235,10 +235,9 @@ class FlattenToVecOp : public Operator<Context> {
  
    bool RunOnDevice() override {
      auto& input = Input(0);
-    auto* output = Output(0);
      CAFFE_ENFORCE_GE(
          input.dim(), 1, "The rank of the tensor must be >= 1.");
-    output->Resize(input.numel());
+    auto* output = Output(0, {input.numel()}, at::dtype(input.dtype()));
  
      context_.CopyItemsSameDevice(
          input.dtype(),
@@ -259,9 +258,8 @@ class ResizeLikeOp : public Operator<Context> {
    bool RunOnDevice() override {
      auto& input0 = Input(0);
      auto& input1 = Input(1);
-    auto* output = Output(0);
      CAFFE_ENFORCE_EQ(input0.numel(), input1.numel());
-    output->ResizeLike(Input(1));
+    auto* output = Output(0, input1.sizes(), at::dtype(input0.dtype()));
      context_.CopyItemsSameDevice(
          input0.dtype(),
          input0.numel(),
@@ -1050,8 +1048,6 @@ class GatherRangesOp : public Operator<Context> {
    bool DoRunWithType() {
      auto& data = Input(DATA);
      auto& ranges = Input(RANGES);
-    auto* outputData = Output(0);
-    auto* outputLengths = Output(1);
  
      auto batchSize = ranges.size(0);
      CAFFE_ENFORCE(data.dim() == 1, "Data has to be 1-D");
@@ -1063,7 +1059,7 @@ class GatherRangesOp : public Operator<Context> {
      auto* rawData = static_cast<const char*>(data.raw_data());
      auto* rangesData = ranges.template data<Index>();
  
-    outputLengths->Resize(batchSize);
+    auto* outputLengths = Output(1, {batchSize}, at::dtype<int32_t>());
      auto* outputLengthsPtr = outputLengths->template mutable_data<int32_t>();
      size_t start = 0;
      size_t blockSize = ranges.size_from_dim(1);
@@ -1074,7 +1070,8 @@ class GatherRangesOp : public Operator<Context> {
      }
  
      size_t outputSize = accumulate(rangesData, 0, ranges.numel());
-    outputData->Resize(outputSize);
+    auto* outputData =
+        Output(0, {static_cast<int64_t>(outputSize)}, at::dtype(data.dtype()));
  
      auto outputRawData =
          static_cast<char*>(outputData->raw_mutable_data(data.dtype()));
@@ -1130,7 +1127,6 @@ class LengthsGatherOp : public Operator<Context> {
      auto& items = Input(ITEMS);
      auto& lengths = Input(LENGTHS);
      auto& indices = Input(INDICES);
-    auto* output = Output(0);
  
      CAFFE_ENFORCE_GE(items.dim(), 1, "ITEMS should be at least 1-D");
      CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTHS should be 1-D");
@@ -1147,7 +1143,7 @@ class LengthsGatherOp : public Operator<Context> {
      }
      auto shape = items.sizes().vec();
      shape[0] = total_length;
-    output->Resize(shape);
+    auto* output = Output(0, {shape}, at::dtype(items.dtype()));
  
      offsets_.clear();
      int64_t running_offset = 0;
diff --git a/caffe2/quantization/server/fully_connected_dnnlowp_op.cc b/caffe2/quantization/server/fully_connected_dnnlowp_op.cc

index 5eee0a9..f697429 100644 (file)
--- a/caffe2/quantization/server/fully_connected_dnnlowp_op.cc
+++ b/caffe2/quantization/server/fully_connected_dnnlowp_op.cc
@@ -83,8 +83,7 @@ bool FullyConnectedDNNLowPOp<T>::RunOnDevice() {
      }
  
      auto* Y_ref = fp32_op->Output(0);
-    auto* Y = OutputTensorCPU_(0);
-    Y->ResizeLike(*Y_ref);
+    auto* Y = OutputTensorCPU_(0, Y_ref->sizes(), at::dtype(Y_ref->dtype()));
      fp32_op->context_.CopyItemsSameDevice(
          Y_ref->dtype(),
          Y_ref->size(),
diff --git a/caffe2/queue/rebatching_queue.cc b/caffe2/queue/rebatching_queue.cc

index 4cd54e0..e4015b9 100644 (file)
--- a/caffe2/queue/rebatching_queue.cc
+++ b/caffe2/queue/rebatching_queue.cc
@@ -84,7 +84,8 @@ std::vector<std::vector<TensorCPU>> split(
      CAFFE_ENFORCE_EQ(input.sizes().at(0), outputSize);
  
      for (int i = 0; i < outputSize; ++i) {
-      outputs[i].push_back(Tensor(outputDims, CPU));
+      outputs[i].push_back(
+          caffe2::empty(outputDims, at::dtype(input.dtype()).device(CPU)));
        context.CopyItemsToCPU(
            input.dtype(),
            innerSize,
author	Jerry Zhang <jerryzh@fb.com>
	Sat, 30 Mar 2019 01:26:07 +0000 (18:26 -0700)
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
	Sat, 30 Mar 2019 01:36:46 +0000 (18:36 -0700)
caffe2/operators/boolean_mask_ops.cc		patch \| blob \| history
caffe2/operators/boolean_mask_ops.cu		patch \| blob \| history
caffe2/operators/boolean_unmask_ops.cc		patch \| blob \| history
caffe2/operators/boolean_unmask_ops.cu		patch \| blob \| history
caffe2/operators/concat_split_op.h		patch \| blob \| history
caffe2/operators/conditional_op.cc		patch \| blob \| history
caffe2/operators/copy_op.cu		patch \| blob \| history
caffe2/operators/copy_op.h		patch \| blob \| history
caffe2/operators/crf_viterbi_op.cc		patch \| blob \| history
caffe2/operators/dataset_ops.cc		patch \| blob \| history
caffe2/operators/ensure_cpu_output_op.h		patch \| blob \| history
caffe2/operators/flatten_op.h		patch \| blob \| history
caffe2/operators/gather_ranges_to_dense_op.h		patch \| blob \| history
caffe2/operators/lengths_tile_op.cc		patch \| blob \| history
caffe2/operators/pack_segments.cc		patch \| blob \| history
caffe2/operators/pack_segments.cu		patch \| blob \| history
caffe2/operators/partition_ops.h		patch \| blob \| history
caffe2/operators/prepend_dim_op.h		patch \| blob \| history
caffe2/operators/remove_data_blocks_op.h		patch \| blob \| history
caffe2/operators/reservoir_sampling.cc		patch \| blob \| history
caffe2/operators/reshape_op.h		patch \| blob \| history
caffe2/operators/sequence_ops.cc		patch \| blob \| history
caffe2/operators/text_file_reader.cc		patch \| blob \| history
caffe2/operators/tile_op.h		patch \| blob \| history
caffe2/operators/utility_ops.h		patch \| blob \| history
caffe2/quantization/server/fully_connected_dnnlowp_op.cc		patch \| blob \| history
caffe2/queue/rebatching_queue.cc		patch \| blob \| history