auto& Xin = Input(X);
auto& Yin = Input(Y);
auto& DYin = Input(DY);
- auto* DXout = Output(DX);
+
CAFFE_ENFORCE_EQ(Xin.numel(), Yin.numel());
CAFFE_ENFORCE_EQ(DYin.numel(), Yin.numel());
- DXout->ResizeLike(Yin);
+ auto* DXout = Output(DX, Yin.sizes(), at::dtype<float>());
const float* Xdata = Xin.template data<float>();
const float* Ydata = Yin.template data<float>();
template <>
bool ThresholdedReluOp<float, CPUContext>::RunOnDevice() {
auto& X = Input(0);
- auto* Y = Output(0);
- Y->ResizeLike(X);
+
+ auto* Y = Output(0, X.sizes(), at::dtype<float>());
ConstEigenVectorArrayMap<float> Xvec(X.data<float>(), X.numel());
EigenVectorArrayMap<float> Yvec(
bool ThresholdedReluGradientOp<float, CPUContext>::RunOnDevice() {
auto& Y = Input(0);
auto& dY = Input(1);
- auto* dX = Output(0);
+
CAFFE_ENFORCE_EQ(dY.numel(), Y.numel());
- dX->ResizeLike(Y);
+ auto* dX = Output(0, Y.sizes(), at::dtype<float>());
const float* Ydata = Y.data<float>();
const float* dYdata = dY.data<float>();
int* remapping = nullptr;
if (REMAPPING < OutputSize()) {
- auto* remappingTensor = Output(REMAPPING);
- remappingTensor->ResizeLike(inputTensor);
+ auto* remappingTensor =
+ Output(REMAPPING, inputTensor.sizes(), at::dtype<int>());
remapping = remappingTensor->template mutable_data<int>();
}
template <typename T, typename M>
bool DoRunWithType() {
auto& input0 = Input(0);
- auto* output = Output(0);
+
if (InputSize() == 1) {
- output->CopyFrom(input0, true /*async*/);
+ // TODO: better TensorOptions argument passing(e.g. default argument)
+ OutputTensorCopyFrom(
+ 0,
+ // I'll change the order of argument in another diff, so that we don't
+ // need to write this
+ at::dtype(input0.dtype()),
+ input0,
+ true /*async*/);
return true;
}
- output->ResizeLike(input0);
+ auto* output = Output(0, input0.sizes(), at::dtype<T>());
T* output_data = output->template mutable_data<T>();
// Dimension checking
for (int i = 1; i < InputSize(); ++i) {
CAFFE_ENFORCE_GT(X0.numel(), 0);
CAFFE_ENFORCE_EQ(weight0.numel(), 1);
const int size = X0.numel();
- auto* Y = Output(0);
- if (Y != &X0) {
- Y->ResizeLike(X0);
- }
+ // Note: removed Aliasing check, since Output already has
+ // caching capability
+ auto* Y = Output(0, X0.sizes(), at::dtype<T>());
T* Y_data = Y->template mutable_data<T>();
if (input_size == 2) {
math::Scale<float, T>(
return true;
}
const auto& X1 = Input(2);
- CAFFE_ENFORCE_NE(
- &X1,
- Y,
+ CAFFE_ENFORCE(
+ !IsInputOutputAlias(2, 0),
"Input #2 is the same as output. If you want to do in-place updates, "
"put the output as input #0.");
const auto& weight1 = Input(3);
CAFFE_ENFORCE_EQ(X1.numel(), size);
CAFFE_ENFORCE_EQ(weight1.numel(), 1);
- if (Y != &X0) {
+ if (!IsInputOutputAlias(0, 0)) {
context_.template CopySameDevice<T>(size, X0.template data<T>(), Y_data);
}
math::Axpby<float, T, Context>(
const std::string err_msg = "Input #" + to_string(i) +
" is the same as output. If you want to do in-place updates, "
"put the output as input #0.";
- CAFFE_ENFORCE_NE(&Xi, Y, err_msg);
+ CAFFE_ENFORCE(!IsInputOutputAlias(i, 0), err_msg);
const auto& weighti = Input(i + 1);
CAFFE_ENFORCE_EQ(Xi.numel(), size);
CAFFE_ENFORCE_EQ(weighti.numel(), 1);
for (int i = 0; i < InputSize() / 2; i++) {
auto& cur_w = Input(2 * i + 2);
CAFFE_ENFORCE_EQ(cur_w.numel(), 1);
- auto* cur_dX = Output(i);
- cur_dX->ResizeLike(dY);
+
+ auto* cur_dX = Output(i, dY.sizes(), at::dtype<DstType>());
math::Scale<float, DstType, Context>(
size,
CAFFE_ENFORCE_EQ(weight0.numel(), 1);
const int input_size = X0.numel();
SetTensorDescriptor(cudnnTypeWrapper<T>::type, input_size);
- auto* Y = Output(0);
- if (Y != &X0) {
- Y->ResizeLike(X0);
- }
+
+ // Note: removed Aliasing check, since Output already has
+ // caching capability
+ auto* Y = Output(0, X0.sizes(), at::dtype<T>());
T* Y_data = Y->template mutable_data<T>();
T alpha = convert::To<float, T>(0.0f);
T beta = convert::To<float, T>(0.0f);
return true;
}
const auto& X1 = Input(2);
- CAFFE_ENFORCE_NE(
- &X1,
- Y,
+ CAFFE_ENFORCE(
+ !IsInputOutputAlias(2, 0),
"Input #2 is the same as output. If you want to do in-place updates, "
"put the output as input #0.");
const auto& weight1 = Input(3);
CAFFE_ENFORCE_EQ(weight1.numel(), 1);
CopyWeightToHost<T>(weight1.template data<float>(), &alpha);
CopyWeightToHost<T>(weight0.template data<float>(), &beta);
- if (Y == &X0) {
+ if (IsInputOutputAlias(0, 0)) {
CUDNN_ENFORCE(cudnnAddTensor(
cudnn_wrapper_.inline_cudnn_handle(),
&alpha,
const std::string err_msg = "Input #" + to_string(i) +
" is the same as output. If you want to do in-place updates, "
"put the output as input #0.";
- CAFFE_ENFORCE_NE(&Xi, Y, err_msg);
+ CAFFE_ENFORCE(!IsInputOutputAlias(i, 0), err_msg);
const auto& weighti = Input(i + 1);
CAFFE_ENFORCE_EQ(Xi.numel(), input_size);
CAFFE_ENFORCE_EQ(weighti.numel(), 1);
BlobGetMutableTensor(local_output_blobs_[0], Context::GetDeviceType());
const T* output_local_data = local_output->template data<T>();
- Tensor* output = Operator<Context>::Output(0);
- output->ResizeLike(*local_output);
+ Tensor* output =
+ Operator<Context>::Output(0, local_output->sizes(), at::dtype<T>());
T* output_data = output->template mutable_data<T>();
#ifdef _OPENMP
#pragma omp parallel for
BlobGetMutableTensor(local_output_blobs_[0], Context::GetDeviceType());
const T* output_local_data = local_output->template data<T>();
- Tensor* output = Operator<Context>::Output(0);
- output->ResizeLike(*local_output);
+ Tensor* output =
+ Operator<Context>::Output(0, local_output->sizes(), at::dtype<T>());
T* output_data = output->template mutable_data<T>();
#ifdef _OPENMP
#pragma omp parallel for
CAFFE_ENFORCE(M * K == X.size());
CAFFE_ENFORCE(K * N == W.size());
- auto* dW = Output(0);
-
- dW->ResizeLike(W);
+ auto* dW = Output(0, W.sizes(), at::dtype<T_DW>());
auto* db = Output(1, {N}, at::dtype<T_DB>());
if (X.size() == 0) {
&context_);
if (OutputSize() == 3) {
- auto* dX = Output(2);
- dX->ResizeLike(X);
- dX->template mutable_data<T_DX>();
+ Output(2, X.sizes(), at::dtype<T_DX>());
}
return true;
// Compute dX
if (OutputSize() == 3) {
- auto* dX = Output(2);
- dX->ResizeLike(X);
+ auto* dX = Output(2, X.sizes(), at::dtype<T_DX>());
math::Gemm<T_DX, Context, Engine>(
CblasNoTrans,
TransposeWeight ? CblasNoTrans : CblasTrans,
const auto* input_tensor_data = input_tensor.template data<float>();
const auto* val_data = val.template data<float>();
- auto* clipped = Output(0);
- clipped->ResizeLike(input_tensor);
+ auto* clipped = Output(0, input_tensor.sizes(), at::dtype<float>());
float* clipped_tensor_data = clipped->template mutable_data<float>();
if (InputSize() > 2) {
// Output data
-#define CAFFE2_YF_READ_OUTPUT(OUTPUT_NAME, VAR_NAME) \
- auto VAR_NAME##_out_tensor = Output(OUTPUT_##OUTPUT_NAME); \
- VAR_NAME##_out_tensor->ResizeLike(VAR_NAME##_tensor); \
+#define CAFFE2_YF_READ_OUTPUT(OUTPUT_NAME, VAR_NAME) \
+ auto VAR_NAME##_out_tensor = \
+ Output(OUTPUT_##OUTPUT_NAME, VAR_NAME##_tensor.sizes(), at::dtype<T>()); \
VAR_NAME##_out_ = VAR_NAME##_out_tensor->template mutable_data<T>();
CAFFE2_YF_READ_OUTPUT(PARAM, param)
bool BatchPermutationOp<float, CPUContext>::RunOnDevice() {
const auto& X = Input(0);
const auto& indices = Input(1);
- auto* Y = Output(0);
CAFFE_ENFORCE_EQ(indices.dim(), 1, "indices must be 1-d");
CAFFE_ENFORCE_EQ(
indices.dim32(0),
")");
- Y->ResizeLike(X);
+ auto* Y = Output(0, X.sizes(), at::dtype<float>());
const int N = X.dim32(0);
const int C = X.dim32(1);