int idx,
at::TensorOptions options,
const Tensor& src,
- BaseContext* context = nullptr) {
+ bool async = false) {
Tensor* t = Output<Tensor>(idx, options.device().type());
// TODO:
// We plan to use the following:
CAFFE_ENFORCE(
!t->dtype_initialized() || t->dtype() == src.dtype(),
"We don't allow a change of data type in OutputTensor");
- t->CopyFrom(src, context);
+ t->CopyFrom(src, async);
return t;
}
bool RunOnDevice() override {
auto* output = Output(OUTPUT);
- TensorCPU* axis_info = OperatorBase::Output<TensorCPU>(AXIS_INFO, CPU);
vector<itensor> inputs;
for (int i = 0; i < InputSize(); ++i) {
}
auto axis_vdata = ideep::concat::compute(inputs, axis_, add_axis_, *output);
- axis_info->Resize(vector<int64_t>(1, InputSize()));
+ Tensor* axis_info = OutputTensor(
+ AXIS_INFO,
+ vector<int64_t>(1, InputSize()),
+ at::dtype<int>().device(CPU));
int* axis_data = axis_info->template mutable_data<int>();
for (int i = 0; i < axis_vdata.size(); i++) {
axis_data[i] = axis_vdata[i];
if (BlobIsTensorType(input_blob, CPU)) {
VLOG(2) << "Directing sharing of TensorCPU";
const auto& X = OperatorBase::Input<Tensor>(0, CPU);
- auto* Y = OperatorBase::Output<Tensor>(0, CPU);
- Y->CopyFrom(X);
+ OutputTensorCopyFrom(0, at::device(CPU), X);
} else {
const auto& X = OperatorBase::Input<itensor>(0);
- auto* Y = OperatorBase::Output<Tensor>(0, CPU);
- Y->Resize(X.get_dims());
if (X.get_data_type() == itensor::data_type::f32) {
+ std::vector<int64_t> dims;
+ for (int i = 0; i < X.get_dims().size(); ++i) {
+ dims.push_back(X.get_dims()[i]);
+ }
+ auto* Y =
+ OperatorBase::OutputTensor(0, dims, at::dtype<float>().device(CPU));
X.reorder_to(Y->template mutable_data<float>());
} else {
CAFFE_THROW("Unsupported ideep type: ", X.get_data_type());
// Now, actually run the computation.
if (!no_bias_) {
- auto* dbias = Output(BIAS_OR_INPUT_GRAD);
- dbias->Resize(M);
+ auto* dbias = Output(BIAS_OR_INPUT_GRAD, {M}, at::dtype<T_DB>());
CUDNN_ENFORCE(cudnnConvolutionBackwardBias(
cudnn_wrapper_.inline_cudnn_handle(),
cudnnTypeWrapper<T_DY>::kOne(),
// Now, actually run the computation.
if (!no_bias_) {
- auto* dbias = Output(BIAS_OR_INPUT_GRAD);
- dbias->Resize(C);
+ auto* dbias = Output(BIAS_OR_INPUT_GRAD, {C}, at::dtype<T>());
CUDNN_ENFORCE(cudnnConvolutionBackwardBias(
cudnn_wrapper_.inline_cudnn_handle(),
cudnnTypeWrapper<T>::kOne(),
cudnnTensorDescriptor_t data_desc_;
cudnnDropoutDescriptor_t dropout_desc_;
- vector<int64_t> cudnn_input_dims_;
+ at::IntList cudnn_input_dims_;
float ratio_;
bool is_test_;
cudnnTensorDescriptor_t data_desc_;
cudnnDropoutDescriptor_t dropout_desc_;
- vector<int64_t> cudnn_input_dims_;
+ at::IntList cudnn_input_dims_;
Blob* scratch_blob_;
}
return true;
} else {
- auto* mask = Output(1);
// Reshape tensor descriptors if necessary
- if (X.sizes() != cudnn_input_dims_ && !is_test_) {
+ if (X.sizes() != cudnn_input_dims_) {
CAFFE_ENFORCE(scratch_blob_);
Tensor* states = BlobGetMutableTensor(scratch_blob_, CUDA);
- cudnn_input_dims_ = X.sizes().vec();
+ cudnn_input_dims_ = X.sizes();
CUDNN_ENFORCE(cudnnSetTensor4dDescriptor(
data_desc_,
GetCudnnTensorFormat(StorageOrder::NCHW),
CUDNN_ENFORCE(cudnnDropoutGetReserveSpaceSize(
data_desc_, &reserve_space_size_in_bytes_));
- mask->Resize(reserve_space_size_in_bytes_);
states->Resize(states_size_in_bytes_);
if (!states_initialized_) {
states_initialized_ = true;
}
}
+ auto* mask = Output(
+ 1,
+ {static_cast<int64_t>(reserve_space_size_in_bytes_)},
+ at::dtype<uint8_t>());
CUDNN_ENFORCE(cudnnDropoutForward(
cudnn_wrapper_.inline_cudnn_handle(),
dropout_desc_,
}
if (dY.sizes() != cudnn_input_dims_) {
- cudnn_input_dims_ = dY.sizes().vec();
+ cudnn_input_dims_ = dY.sizes();
CUDNN_ENFORCE(cudnnSetTensor4dDescriptor(
data_desc_,
GetCudnnTensorFormat(StorageOrder::NCHW),
CAFFE_ENFORCE(K * N == W.numel(), dimErrorString());
auto* dW = Output(0);
- auto* db = Output(1);
+
dW->ResizeLike(W);
- db->Resize(N);
+ auto* db = Output(1, {N}, at::dtype<T_DB>());
if (X.numel() == 0) {
// generate a zero blob for db and dW when X is empty
CAFFE_ENFORCE(IS_LITTLE_ENDIAN, "Unsupported endianness");
const auto& input = Input(DATA_FUSED_SCALE_BIAS_INT8);
- auto* output = Output(DATA_FLOAT);
const auto input_rows = input.size(0);
const auto input_columns = input.size(1);
// input_columns is the number of values in the original row.
const std::vector<int64_t> output_dimensions = {input_rows,
input_columns - 8};
- output->Resize(output_dimensions);
+ auto* output = Output(DATA_FLOAT, output_dimensions, at::dtype<T>());
const auto output_columns = output->size(1);
const auto* input_data = input.template data<uint8_t>();
}
protected:
+ // TODO: std::vector<int> -> std::vector<int64_t>
void SetTensorDescriptor(
const cudnnDataType_t data_type,
const StorageOrder order,
template <typename T>
bool DoRunWithType() {
const auto& X = Input(0);
- auto* Y = Output(0);
+
const int ndim = X.dim();
const int N = X.dim32(0);
const int C = X.dim32(ndim - 1);
Y_dims[0] = N;
Y_dims[1] = C;
std::copy(X_dims.cbegin() + 1, X_dims.cend() - 1, Y_dims.begin() + 2);
- Y->Resize(Y_dims);
+ std::vector<int64_t> Y_dims_64;
+ std::copy(Y_dims.cbegin(), Y_dims.cend(), std::back_inserter(Y_dims_64));
+ auto* Y = Output(0, Y_dims_64, at::dtype<T>());
if (cached_X_dims_ != X_dims) {
cached_X_dims_ = X_dims;
SetTensorDescriptor(
template <typename T>
bool DoRunWithType() {
const auto& X = Input(0);
- auto* Y = Output(0);
+
const int ndim = X.dim();
const int N = X.dim32(0);
const int C = X.dim32(1);
Y_dims[0] = N;
Y_dims[ndim - 1] = C;
std::copy(X_dims.cbegin() + 2, X_dims.cend(), Y_dims.begin() + 1);
- Y->Resize(Y_dims);
+ std::vector<int64_t> Y_dims_64;
+ std::copy(Y_dims.cbegin(), Y_dims.cend(), std::back_inserter(Y_dims_64));
+ auto* Y = Output(0, Y_dims_64, at::dtype<T>());
if (cached_X_dims_ != X_dims) {
cached_X_dims_ = X_dims;
SetTensorDescriptor(
const auto& X = Input(0);
const auto& W = Input(1);
const auto& b = Input(2);
- auto* Y = Output(0);
+
CAFFE_ENFORCE(b.dim() == 1, b.dim());
// batch size
const auto canonical_axis = X.canonical_axis_index(axis_);
DCHECK_LE(canonical_axis + 1, Y_shape_cache_.size());
Y_shape_cache_.resize(canonical_axis + 1);
Y_shape_cache_[canonical_axis] = N;
- Y->Resize(Y_shape_cache_);
+ auto* Y = Output(0, Y_shape_cache_, at::dtype<T_Y>());
CAFFE_ENFORCE(M * N == Y->size(), dimErrorString());
if (X.size() == 0) {
CAFFE_ENFORCE(K * N == W.size());
auto* dW = Output(0);
- auto* db = Output(1);
+
dW->ResizeLike(W);
- db->Resize(N);
+ auto* db = Output(1, {N}, at::dtype<T_DB>());
if (X.size() == 0) {
// generate a zero blob for db and dW when X is empty
CAFFE_ENFORCE_EQ(OutputSize(), size + 1);
bool status = queue->blockingRead(this->Outputs());
if (table_idx_blob_ >= 0) {
- auto* table_idx_blob_out = Output(table_idx_blob_);
- table_idx_blob_out->Resize(1);
+ auto* table_idx_blob_out =
+ Output(table_idx_blob_, {1}, at::dtype<int32_t>());
int32_t* data = table_idx_blob_out->template mutable_data<int32_t>();
data[0] = idx;
}
"be deprecated soon. More specifically, IterOp now "
"requires an explicit in-place input and output.";
- auto* output = OperatorBase::Output<Tensor>(0, CPU);
VLOG(1) << "Initializing iter counter.";
- output->Resize(1);
+ auto* output = OperatorBase::OutputTensor(
+ 0, {1}, at::dtype<int64_t>().device(CPU));
output->template mutable_data<int64_t>()[0] = 0;
}
}
auto& wd = Input(2);
auto& trust = Input(3);
auto& lr_max = Input(4);
- auto* lr_rescaled = Output(0);
- lr_rescaled->Resize(vector<int64_t>{1});
+
+ auto* lr_rescaled = Output(0, vector<int64_t>{1}, at::dtype<T>());
X_norm_tensor_.Resize(1);
T* X_norm_ = X_norm_tensor_.template mutable_data<T>();