bool BooleanMaskOp<CPUContext>::RunOnDevice() {
auto& data = Input(0);
auto& mask = Input(1);
+ auto* dataOut = Output(0);
CAFFE_ENFORCE(data.dim() >= 1);
CAFFE_ENFORCE_EQ(mask.dim(), 1);
CAFFE_ENFORCE(data.size(0) == mask.size(0));
std::vector<int64_t> outShape;
outShape.push_back(numOutputs);
outShape.insert(outShape.end(), data.sizes().begin() + 1, data.sizes().end());
- auto* dataOut = Output(0, outShape, at::dtype(data.dtype()));
+ dataOut->Resize(outShape);
auto* outPtr = (char*)dataOut->raw_mutable_data(data.dtype());
int64_t* out_vec = nullptr;
bool RunOnDevice() override {
const auto& src = Input(0);
const auto& mask = Input(1);
+ auto* dest = Output(0);
CAFFE_ENFORCE(src.dim() >= 1);
CAFFE_ENFORCE_EQ(mask.dim(), 1);
indices_.Resize(numOfOutput);
std::vector<int64_t> dims = src.sizes().vec();
dims[0] = numOfOutput;
- auto* dest = Output(0, dims, at::dtype(src.dtype()));
- auto* destData = (uint8_t*)dest->raw_mutable_data(src.dtype());
+ dest->Resize(dims);
+ auto* destData = (uint8_t*)dest->raw_mutable_data(src.meta());
const auto* srcData = (uint8_t*)src.raw_data();
if (OutputSize() == 2) {
bool BooleanUnmaskOp<CPUContext>::RunOnDevice() {
int maskSize = Input(0).numel();
int numMasks = InputSize() / 2;
- auto& valueDtype = Input(1).dtype();
+ auto& valueMeta = Input(1).dtype();
- auto* valuesOut = Output(0, maskSize, at::dtype(valueDtype));
- auto* valuesOutPtr = (char*)valuesOut->raw_mutable_data(valueDtype);
+ auto* valuesOut = Output(0);
+ valuesOut->Resize(maskSize);
+ auto* valuesOutPtr = (char*)valuesOut->raw_mutable_data(valueMeta);
std::vector<int> nextValueIndices(numMasks, 0);
for (int maskOffset = 0; maskOffset < maskSize; ++maskOffset) {
if (maskPtr[maskOffset]) {
auto& valueIndex = nextValueIndices[maskIndex];
CAFFE_ENFORCE_LT(valueIndex, values.numel());
- auto* src = valuesPtr + (valueIndex++) * valueDtype.itemsize();
- auto* dst = valuesOutPtr + maskOffset * valueDtype.itemsize();
- std::copy(src, src + valueDtype.itemsize(), dst);
+ auto* src = valuesPtr + (valueIndex++) * valueMeta.itemsize();
+ auto* dst = valuesOutPtr + maskOffset * valueMeta.itemsize();
+ std::copy(src, src + valueMeta.itemsize(), dst);
maskFound = true;
break;
}
bool RunOnDevice() override {
int maskSize = Input(0).numel();
int numMasks = InputSize() / 2;
- const auto& dtype = Input(1).dtype();
+ const auto& meta = Input(1).meta();
- auto* out = Output(0, maskSize, at::dtype(dtype));
- auto* dest = (char*)out->raw_mutable_data(dtype);
+ auto* out = Output(0);
+ out->Resize(maskSize);
+ auto* dest = (char*)out->raw_mutable_data(meta);
ReinitializeTensor(&hostMasks_, {numMasks}, at::dtype<bool*>().device(CPU));
auto* hostMasksData = hostMasks_.mutable_data<bool*>();
context_.cuda_stream()>>>(
numMasks,
maskSize,
- dtype.itemsize(),
+ meta.itemsize(),
indicesData,
values_.data<char*>(),
valueSizesData,
}
size_t input_offset = 0;
for (int i = 0; i < OutputSize(); ++i) {
+ auto* output = Output(i);
auto axis_dim = add_axis_ ? 1 : axis_data[i];
if (!add_axis_) {
output_dims[canonical_axis] = axis_data[i];
}
- auto* output = Output(i, output_dims, at::dtype(input.dtype()));
+ output->Resize(output_dims);
math::CopyMatrix<Context>(
input.itemsize(),
before,
int after = input.size_from_dim(canonical_axis + 1);
size_t input_offset = 0;
for (int i = 0; i < OutputSize(); ++i) {
+ auto* output = Output(i);
const auto* axis_offset = axis_data + length_length / OutputSize() * i;
auto axis_dim = std::accumulate(
axis_offset, axis_offset + length_length / OutputSize(), 0);
output_dims[canonical_axis] = axis_dim;
- auto* output = Output(i, output_dims, at::dtype(input.dtype()));
+ output->Resize(output_dims);
math::CopyMatrix<Context>(
input.itemsize(),
before,
template <class Context>
bool ConcatOp<Context>::RunOnDevice() {
+ auto* output = Output(0);
+
// We can override default options(Context::GetDeviceType())
// by explictly passing in device type we want
Tensor* split = Output(
} else {
output_dims[canonical_axis] = output_channels;
}
- auto* output = Output(0, output_dims, at::dtype(input_zero.dtype()));
+ output->Resize(output_dims);
size_t output_offset = 0;
for (int i = 0; i < InputSize(); ++i) {
auto& input = Input(i);
CAFFE_ENFORCE(innerSize * dataF.dtype().itemsize() == innerSizeBytes);
// initialize output shape
+ auto* dataOut = Output(0);
const auto* condPtr = condition.template data<bool>();
- auto* dataOut = Output(0, dataT.sizes(), at::dtype(dataT.dtype()));
+ dataOut->ResizeLike(dataT);
auto* outPtr = (char*)dataOut->raw_mutable_data(dataT.dtype());
// perform conditional op along first dimension
bool RunOnDevice() override {
auto& input = Input(0);
- auto* output = OperatorBase::OutputTensor(
- 0, input.sizes(), at::dtype(input.dtype()).device(CUDA));
+ auto* output = OperatorBase::Output<Tensor>(0, CUDA);
CUDAContext context(GetGPUIDForPointer(Input(1).raw_data()));
+ output->ResizeLike(input);
context.template CopyItems<CUDAContext, CUDAContext>(
- input.dtype(),
+ input.meta(),
input.numel(),
input.raw_data(),
- output->raw_mutable_data(input.dtype()));
+ output->raw_mutable_data(input.meta()));
return true;
}
};
bool RunOnDevice() override {
auto& input = this->template Input<Tensor>(0, SrcContext::GetDeviceType());
- auto* output = this->OutputTensor(
- 0,
- input.sizes(),
- at::dtype(input.dtype()).device(DstContext::GetDeviceType()));
+ auto* output =
+ this->template Output<Tensor>(0, DstContext::GetDeviceType());
+ output->ResizeLike(input);
this->context_.template CopyItems<SrcContext, DstContext>(
input.dtype(),
input.numel(),
auto block_size = predictions.numel() / predictions.size(0);
auto block_bytesize =
predictions.size_from_dim(1) * predictions.dtype().itemsize();
- Tensor backpointers =
- caffe2::empty(predictions.sizes(), at::dtype<int32_t>().device(CPU));
-
- Tensor trellis = caffe2::empty(
- std::vector<int64_t>{block_size},
- at::dtype(predictions.dtype()).device(CPU));
- Tensor dpMat =
- caffe2::empty(transitions.sizes(), at::dtype<float>().device(CPU));
- Tensor dpMax = caffe2::empty(
- std::vector<int64_t>{block_size}, at::dtype<float>().device(CPU));
+ Tensor backpointers(CPU);
+ backpointers.ResizeLike(predictions);
+
+ Tensor trellis(std::vector<int64_t>{block_size}, CPU);
+ Tensor dpMat(CPU);
+ dpMat.ResizeLike(transitions);
+ Tensor dpMax(std::vector<int64_t>{block_size}, CPU);
GatherRow(predictions, 0, block_size, block_bytesize, &trellis);
for (auto i = 1; i < seqLen; i++) {
AddColToMat(transitions, trellis, &dpMat);
&context_);
}
- Tensor tMax =
- caffe2::empty(std::vector<int64_t>{1}, at::dtype<float>().device(CPU));
- Tensor tArgMax = caffe2::empty(
- std::vector<int64_t>{1}, at::dtype<int32_t>().device(CPU));
+ Tensor tMax(std::vector<int64_t>{1}, CPU);
+ Tensor tArgMax(std::vector<int64_t>{1}, CPU);
ColwiseMaxAndArg(
trellis.template data<float>(),
1,
std::vector<int32_t> viterbiVec;
viterbiVec.push_back(tArgMax.template data<int32_t>()[0]);
- Tensor bpEntry = caffe2::empty(
- std::vector<int64_t>{block_size},
- at::dtype(backpointers.dtype()).device(CPU));
+ Tensor bpEntry(std::vector<int64_t>{block_size}, CPU);
block_bytesize =
backpointers.size_from_dim(1) * backpointers.dtype().itemsize();
for (auto i = seqLen - 1; i > 0; i--) {
: Operator(std::forward<Args>(args)...) {}
bool RunOnDevice() override {
auto& data = Input(0);
- auto& newBestIndicies = Input(1);
+ auto& newBestIdicies = Input(1);
CAFFE_ENFORCE(
- data.dim() == 2 && newBestIndicies.dim() == 1,
+ data.dim() == 2 && newBestIdicies.dim() == 1,
"predictions should be a 2D matrix and bestPath should be 1D vector");
CAFFE_ENFORCE(
- data.size(0) == newBestIndicies.size(0),
+ data.size(0) == newBestIdicies.size(0),
"predictions and bestPath dimensions not matching");
auto* updatedData = Output(0, data.sizes(), at::dtype<float>());
context_.CopyItemsSameDevice(
data.dtype(), data.numel(), data.template data<float>(), outData);
- Tensor bestScores =
- caffe2::empty(newBestIndicies.sizes(), at::dtype<float>().device(CPU));
- Tensor oldBestIndices = caffe2::empty(
- newBestIndicies.sizes(), at::dtype<int32_t>().device(CPU));
+ Tensor bestScores(CPU);
+ bestScores.ResizeLike(newBestIdicies);
+ Tensor oldBestIndices(CPU);
+ oldBestIndices.ResizeLike(newBestIdicies);
ColwiseMaxAndArg(
data.template data<float>(),
auto block_size = data.numel() / data.size(0);
const int32_t* oldBestIdx = oldBestIndices.template data<int32_t>();
- const int32_t* newIdx = newBestIndicies.template data<int32_t>();
+ const int32_t* newIdx = newBestIdicies.template data<int32_t>();
for (auto i = 0; i < data.dim32(0); i++) {
std::swap(
Output(0)->Resize(walker.size());
// Output(0)->raw_mutable_data(TypeMeta::Make<SharedTensorVectorPtr>()));
- auto* dst = Output(
- 0,
- {static_cast<int64_t>(walker.size())},
- at::dtype<SharedTensorVectorPtr>())
- ->template mutable_data<SharedTensorVectorPtr>();
+ auto* dst = Output(0)->template mutable_data<SharedTensorVectorPtr>();
for (int batchId = 0; batchId < walker.size(); ++batchId) {
dst[batchId] = std::make_shared<std::vector<TensorCPU>>();
// Resize to the final output size
std::vector<void*> destinations(numTensors);
for (int i = 0; i < numTensors; ++i) {
- auto* output = Output(i, {outputDims[i]}, at::dtype(*metas[i]));
- destinations[i] = output->raw_mutable_data(*metas[i]);
+ Output(i)->Resize(outputDims[i]);
+ destinations[i] = Output(i)->raw_mutable_data(*metas[i]);
}
for (int i = 0; i < numRows; ++i) {
auto innerSize = in.size_from_dim(1);
outDim = in.sizes().vec();
outDim[0] = size;
+ auto* out = Output(i);
+ out->Resize(outDim);
void* src =
(char*)in.raw_data() + offset * innerSize * in.dtype().itemsize();
- auto* out = Output(i, {outDim}, at::dtype(in.dtype()));
void* dst = out->raw_mutable_data(in.dtype()); // create the tensor
if (out->numel() == 0) {
continue;
idx++;
}
idx = idxbegin; // reSet
- auto* out = Output(i, {outDim}, at::dtype(in.dtype()));
+ auto* out = Output(i);
+ out->Resize(outDim);
if (out->numel() == 0) {
continue;
}
bool RunOnDevice() override {
auto& a = Input(0);
auto& b = Input(1);
- auto* c = Output(0, a.sizes(), at::dtype(a.dtype()));
+ auto* c = Output(0);
CAFFE_ENFORCE(b.dim() >= 1);
if (a.numel() == 0 && a.size(0) == 0) {
c->CopyFrom(b);
return true;
}
- CAFFE_ENFORCE(IsInputOutputAlias(0, 0), "First argument must be in-place.");
+ CAFFE_ENFORCE(&a == c, "First argument must be in-place.");
CAFFE_ENFORCE(c->dim() == b.dim());
CAFFE_ENFORCE(b.dim() == c->dim());
CAFFE_ENFORCE(a.dtype() == b.dtype());
for (int i = 0; i < numFields; ++i) {
auto& a = Input(1 + i);
auto& b = Input(1 + i + numFields);
- auto* c = Output(i, a.sizes(), at::dtype(a.dtype()));
+ auto* c = Output(i);
CAFFE_ENFORCE(b.dim() >= 1);
if (a.numel() == 0) {
continue;
}
CAFFE_ENFORCE(
- IsInputOutputAlias(1 + i, i),
- "Appended-to arguments must be in-place.");
+ (void*)&a == (void*)c, "Appended-to arguments must be in-place.");
CAFFE_ENFORCE(c->dim() == b.dim());
CAFFE_ENFORCE(b.dim() == c->dim());
CAFFE_ENFORCE(a.dtype() == b.dtype());
for (int i = 0; i < numFields; ++i) {
auto& a = Input(1 + i);
auto& b = Input(1 + i + numFields);
- // Can we create Tensor with numel() == 0?
- auto* c = Output(i, a.sizes(), at::dtype(a.dtype()));
+ auto* c = Output(i);
if (a.numel() == 0 && a.size(0) == 0) {
c->CopyFrom(b);
continue;
const TensorVectorPtr& tensorVector =
OperatorBase::Input<TensorVectorPtr>(TENSOR_VECTOR);
+ auto* tensor = Output(TENSOR);
CAFFE_ENFORCE(!tensorVector->empty());
vector<int64_t> outputDims(tensorVector->at(0).sizes().vec());
outputDims[0] += tensorVector->at(i).sizes()[0];
}
- auto* tensor =
- Output(TENSOR, outputDims, at::dtype(tensorVector->at(0).dtype()));
+ tensor->Resize(outputDims);
int64_t offset = 0;
auto* dst = (char*)tensor->raw_mutable_data(tensorVector->at(0).dtype());
// trim each column to the offset
for (int col = 0; col < walker.fields().size(); ++col) {
auto newOuterSize = walker.fields().at(col).offset();
- // TODO: Remove call to Output(col) since it
- // returns partially initialized Tensor
Output(col)->ShrinkTo(newOuterSize);
}
return true;
template <class InputContext>
bool CopyWithContext() {
// Output is always on CPU
+ auto* output = this->template Output<Tensor>(0, CPU);
auto& input = this->template Input<Tensor>(0, InputContext::GetDeviceType());
- // TODO: is it possible to use OutputTensorCopyFrom?
- auto* output = this->OutputTensor(
- 0, input.sizes(), at::dtype(input.dtype()).device(CPU));
+ output->ResizeLike(input);
context_.CopyItemsToCPU(
input.dtype(),
input.numel(),
bool RunOnDevice() override {
auto& input = Input(0);
+ auto* output = Output(0);
CAFFE_ENFORCE_GE(
input.dim(), axis_, "The rank of the tensor must be >= axis.");
- auto* output = Output(
- 0,
- {input.size_to_dim(axis_), input.size_from_dim(axis_)},
- at::dtype(input.dtype()));
+ output->Resize(input.size_to_dim(axis_), input.size_from_dim(axis_));
context_.CopyItemsSameDevice(
input.dtype(),
input.numel(),
vector<int64_t> outputDims{batchSize, 0};
vector<char*> outputRawData;
for (int i = 0; i < OutputSize(); ++i) {
+ auto* output = Output(i);
outputDims[1] = lengths_[i];
- auto* output = Output(i, outputDims, at::dtype(data.dtype()));
+ output->Resize(outputDims);
char* ptr = static_cast<char*>(output->raw_mutable_data(data.dtype()));
memset(ptr, 0, output->nbytes());
outputRawData.push_back(ptr);
bool LengthsTileOp<CPUContext>::RunOnDevice() {
auto& data = Input(DATA);
auto& lengths = Input(LENGTHS);
+ auto* output = Output(0);
CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTHS must be 1-D");
CAFFE_ENFORCE_GE(data.dim(), 1, "DATA should be at least 1-D");
auto shape = data.sizes().vec();
shape[0] = total_length;
- auto* output = Output(0, shape, at::dtype(data.dtype()));
+ output->Resize(shape);
auto block_bytesize = data.size_from_dim(1) * data.dtype().itemsize();
auto src = static_cast<const char*>(data.raw_data());
bool UnpackSegmentsOp<CPUContext>::DoRunWithType2() {
const auto& data = Input(DATA);
const auto& lengths = Input(LENGTHS);
+ auto* output = Output(0);
CAFFE_ENFORCE_GE(data.dim(), 2, "DATA should be at least 2-D");
CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTH should be 1-D");
shape[0], lengths.size(0), "LENGTH should match DATA in dimension 0");
shape.erase(shape.begin());
shape[0] = total_l;
- auto* output = Output(0, shape, at::dtype(data.dtype()));
+ output->Resize(shape);
// create output tensor
auto* out = static_cast<char*>(output->raw_mutable_data(data.dtype()));
if (!(data.size(0) && data.size(1))) {
int64_t num_seq = lengths.dim(0);
const Data_T* data_ptr = data.data<Data_T>();
const T* lengths_ptr = lengths.data<T>();
+ auto* out = Output(0);
+ Tensor* presence_mask = nullptr;
+ if (return_presence_mask_) {
+ presence_mask = Output(1);
+ }
CAFFE_ENFORCE_GE(data.dim(), 1, "DATA should be at least 1-D");
CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTH should be 1-D");
bool* presence_mask_data = nullptr;
if (return_presence_mask_) {
std::vector<int64_t> presence_shape{lengths.numel(), max_length};
- auto* presence_mask = Output(1, presence_shape, at::dtype<bool>());
+ presence_mask->Resize(presence_shape);
presence_mask_data = presence_mask->template mutable_data<bool>();
}
auto shape = data.sizes().vec(); // Shape of out is batch_size x max_len x ...
shape[0] = max_length;
shape.insert(shape.begin(), lengths.numel());
- auto* out = Output(0, shape, at::dtype(data.dtype()));
- Data_T* out_ptr = static_cast<Data_T*>(out->raw_mutable_data(data.dtype()));
+ out->Resize(shape);
+ Data_T* out_ptr = static_cast<Data_T*>(out->raw_mutable_data(data.meta()));
// Return empty out (with the proper shape) if first dim is 0.
if (!data.dim(0)) {
int64_t num_seq = lengths.dim(0);
const Data_T* data_ptr = data.data<Data_T>();
const T* lengths_ptr = lengths.data<T>();
+ auto* out = Output(0);
CAFFE_ENFORCE_GE(data.dim(), 1, "DATA should be at least 1-D");
CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTH should be 1-D");
shape[0], lengths.dim(0), "LENGTH should match DATA in dimension 0");
shape.erase(shape.begin());
shape[0] = num_cell;
- auto* out = Output(0, shape, at::dtype(data.dtype()));
- Data_T* out_ptr = static_cast<Data_T*>(out->raw_mutable_data(data.dtype()));
+ out->Resize(shape);
+ Data_T* out_ptr = static_cast<Data_T*>(out->raw_mutable_data(data.meta()));
// Return empty out (with the proper shape) if any of the dimensions is 0.
if (data.dim(0) == 0 || data.dim(1) == 0) {
}
CAFFE_ENFORCE_EQ(keysTensor.numel(), totalSize);
- auto* outTensor = Output(0, outShape, at::dtype(meta));
+ auto* outTensor = Output(0);
+ outTensor->Resize(outShape);
auto* outData = static_cast<char*>(outTensor->raw_mutable_data(meta));
const auto blockSize = outTensor->size_from_dim(1);
input.sizes().begin() + main_input.dim() - 1, input.sizes().end());
for (int j = 0; j < partitions; ++j) {
int out_idx = i + j * inputSize;
+ auto output = Output(out_idx);
shape[0] = counts_[j];
- auto output = Output(out_idx, shape, at::dtype(input.dtype()));
+ output->Resize(shape);
out_datas_[out_idx] = output->raw_mutable_data(input.dtype());
}
}
// Specialization when partitions == 1 which just becomes a copy.
for (int i = 0; i < InputSize(); ++i) {
auto& input = Input(i);
- auto* output = Output(i, input.sizes(), at::dtype(input.dtype()));
+ auto& output = *Output(i);
+ output.ResizeLike(input);
context_.CopyItemsSameDevice(
input.dtype(),
input.numel(),
input.raw_data(),
- output->raw_mutable_data(input.dtype()));
+ output.raw_mutable_data(input.dtype()));
}
return true;
}
const int32_t* lengths_data = length_input.template data<int32_t>();
out_length_.resize(partitions);
for (int i = 0; i < partitions; ++i) {
- auto* output = Output(i * InputSize(), elements, at::dtype<int32_t>());
- out_length_[i] = output->template mutable_data<int32_t>();
+ auto& output = *Output(i * InputSize());
+ output.Resize(elements);
+ out_length_[i] = output.template mutable_data<int32_t>();
}
int total_length = 0;
bool RunOnDevice() override {
auto& input = Input(0);
+ auto* output = Output(0);
CAFFE_ENFORCE(input.dim() > 0, "Input must be at least 1D.");
CAFFE_ENFORCE(
for (int i = 1; i < input.sizes().size(); ++i) {
actual_new_shape[i + 1] = input.size(i);
}
- auto* output = Output(0, actual_new_shape, at::dtype(input.dtype()));
+ output->Resize(actual_new_shape);
- if (!IsInputOutputAlias(0, 0)) {
+ if (output != &input) {
// If we are not doing in-place computation, a copy is needed.
context_.CopyItemsSameDevice(
input.dtype(),
bool RunOnDevice() override {
auto& input = Input(0);
+ auto* output = Output(0);
CAFFE_ENFORCE(input.dim() > 1, "Input must be at least 2D.");
for (int i = 1; i < input.sizes().size() - 1; ++i) {
actual_new_shape[i] = input.size(i + 1);
}
- auto* output = Output(0, actual_new_shape, at::dtype(input.dtype()));
+ output->Resize(actual_new_shape);
- if (!IsInputOutputAlias(0, 0)) {
+ if (output != &input) {
// If we are not doing in-place computation, a copy is needed.
context_.CopyItemsSameDevice(
input.dtype(),
ind_vec.erase(std::unique(ind_vec.begin(), ind_vec.end()), ind_vec.end());
indices_size = ind_vec.size();
+ auto* output = Output(0);
auto shape = data.sizes().vec();
shape[0] -= indices_size;
- auto* output = Output(0, shape, at::dtype(data.dtype()));
+ output->Resize(shape);
char* out_ptr = (char*)output->raw_mutable_data(data.dtype());
ind_vec.insert(ind_vec.begin(), -1);
auto& mutex = OperatorBase::Input<std::unique_ptr<std::mutex>>(MUTEX);
std::lock_guard<std::mutex> guard(*mutex);
- // TODO: separate diff for this
auto* output = Output(RESERVOIR);
const auto& input = Input(DATA);
template <typename T>
bool DoRunWithType() {
- DoRunWithTypeImpl<T>(
- Input(0), Output(0, Input(0).sizes(), Input(0).dtype()));
+ DoRunWithTypeImpl<T>(Input(0), Output(0));
return true;
}
}
output->Resize(actual_new_shape);
- if (!IsInputOutputAlias(0, 0)) {
+ if (output != &input) {
// If we are not doing in-place computation, a copy is needed.
context_.CopyItemsSameDevice(
input.dtype(),
features.size(0) == sumLen, "FEATURE and LENGTH should be consistent");
const auto block_size = features.size_from_dim(1);
+ auto* out_features = Output(1 + k);
auto outDim = features.sizes().vec();
outDim.at(0) += needPadding;
- auto* out_features = Output(1 + k, outDim, at::dtype(features.dtype()));
+ out_features->Resize(outDim);
auto dst =
static_cast<char*>(out_features->raw_mutable_data(features.dtype()));
auto src_base = static_cast<const char*>(features.raw_data());
// copy data and add padding index as zero
- Tensor zero =
- caffe2::empty({block_size}, at::dtype(features.dtype()).device(CPU));
+ Tensor zero{CPU};
+ zero.Resize(block_size);
auto zeroPtr = static_cast<char*>(zero.raw_mutable_data(features.dtype()));
memset(zeroPtr, 0, zero.nbytes());
int start_dest = 0;
// it.
std::vector<char*> datas(numFields);
for (int i = 0; i < numFields; ++i) {
- auto* output = Output(i, batchSize_, at::dtype(instance->fieldMetas[i]));
- datas[i] = (char*)output->raw_mutable_data(instance->fieldMetas[i]);
+ Output(i)->Resize(batchSize_);
+ datas[i] = (char*)Output(i)->raw_mutable_data(instance->fieldMetas[i]);
}
int rowsRead = 0;
}
const auto& X = Input(0);
+ auto* Y = Output(0);
const int axis = X.canonical_axis_index(axis_);
// reshape output to be input tiled along the axis
std::vector<std::int64_t> Y_dims = X.sizes().vec();
Y_dims[axis] *= tiles_;
- auto* Y = Output(0, Y_dims, at::dtype<T>());
+ Y->Resize(Y_dims);
// size up to (and not including) axis
const int outer_size = X.size_to_dim(axis);
}
const auto& dY = Input(0);
+ auto* dX = Output(0);
const int axis = dY.canonical_axis_index(axis_);
// reshape output to be input "untiled" along the axis
std::vector<std::int64_t> X_dims = dY.sizes().vec();
CAFFE_ENFORCE_EQ(X_dims[axis] % tiles_, 0);
X_dims[axis] /= tiles_;
- auto* dX = Output(0, X_dims, at::dtype<T>());
+ dX->Resize(X_dims);
// size up to (and not including) axis
const int outer_size = dX->size_to_dim(axis);
bool RunOnDevice() override {
auto& input = Input(0);
+ auto* output = Output(0);
CAFFE_ENFORCE_GE(
input.dim(), 1, "The rank of the tensor must be >= 1.");
- auto* output = Output(0, {input.numel()}, at::dtype(input.dtype()));
+ output->Resize(input.numel());
context_.CopyItemsSameDevice(
input.dtype(),
bool RunOnDevice() override {
auto& input0 = Input(0);
auto& input1 = Input(1);
+ auto* output = Output(0);
CAFFE_ENFORCE_EQ(input0.numel(), input1.numel());
- auto* output = Output(0, input1.sizes(), at::dtype(input0.dtype()));
+ output->ResizeLike(Input(1));
context_.CopyItemsSameDevice(
input0.dtype(),
input0.numel(),
bool DoRunWithType() {
auto& data = Input(DATA);
auto& ranges = Input(RANGES);
+ auto* outputData = Output(0);
+ auto* outputLengths = Output(1);
auto batchSize = ranges.size(0);
CAFFE_ENFORCE(data.dim() == 1, "Data has to be 1-D");
auto* rawData = static_cast<const char*>(data.raw_data());
auto* rangesData = ranges.template data<Index>();
- auto* outputLengths = Output(1, {batchSize}, at::dtype<int32_t>());
+ outputLengths->Resize(batchSize);
auto* outputLengthsPtr = outputLengths->template mutable_data<int32_t>();
size_t start = 0;
size_t blockSize = ranges.size_from_dim(1);
}
size_t outputSize = accumulate(rangesData, 0, ranges.numel());
- auto* outputData =
- Output(0, {static_cast<int64_t>(outputSize)}, at::dtype(data.dtype()));
+ outputData->Resize(outputSize);
auto outputRawData =
static_cast<char*>(outputData->raw_mutable_data(data.dtype()));
auto& items = Input(ITEMS);
auto& lengths = Input(LENGTHS);
auto& indices = Input(INDICES);
+ auto* output = Output(0);
CAFFE_ENFORCE_GE(items.dim(), 1, "ITEMS should be at least 1-D");
CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTHS should be 1-D");
}
auto shape = items.sizes().vec();
shape[0] = total_length;
- auto* output = Output(0, {shape}, at::dtype(items.dtype()));
+ output->Resize(shape);
offsets_.clear();
int64_t running_offset = 0;
}
auto* Y_ref = fp32_op->Output(0);
- auto* Y = OutputTensorCPU_(0, Y_ref->sizes(), at::dtype(Y_ref->dtype()));
+ auto* Y = OutputTensorCPU_(0);
+ Y->ResizeLike(*Y_ref);
fp32_op->context_.CopyItemsSameDevice(
Y_ref->dtype(),
Y_ref->size(),
CAFFE_ENFORCE_EQ(input.sizes().at(0), outputSize);
for (int i = 0; i < outputSize; ++i) {
- outputs[i].push_back(
- caffe2::empty(outputDims, at::dtype(input.dtype()).device(CPU)));
+ outputs[i].push_back(Tensor(outputDims, CPU));
context.CopyItemsToCPU(
input.dtype(),
innerSize,