unique_ptr<db::DBReader> owned_reader_;
const db::DBReader* reader_;
- Tensor prefetched_image_{CPU};
- Tensor prefetched_label_{CPU};
+ Tensor prefetched_image_;
+ Tensor prefetched_label_;
vector<Tensor> prefetched_additional_outputs_;
Tensor prefetched_image_on_device_;
Tensor prefetched_label_on_device_;
int crop_;
std::vector<float> mean_;
std::vector<float> std_;
- Tensor mean_gpu_{Context::GetDeviceType()};
- Tensor std_gpu_{Context::GetDeviceType()};
+ Tensor mean_gpu_;
+ Tensor std_gpu_;
bool mirror_;
bool is_test_;
bool use_caffe_datum_;
for (int i = 0; i < num_decode_threads_; ++i) {
randgen_per_thread_.emplace_back(meta_randgen());
}
- prefetched_image_.Resize(
- int64_t(batch_size_),
- int64_t(crop_),
- int64_t(crop_),
- int64_t(color_ ? 3 : 1));
+ ReinitializeTensor(
+ &prefetched_image_,
+ {int64_t(batch_size_),
+ int64_t(crop_),
+ int64_t(crop_),
+ int64_t(color_ ? 3 : 1)},
+ at::dtype<uint8_t>().device(CPU));
+ std::vector<int64_t> sizes;
if (label_type_ != SINGLE_LABEL && label_type_ != SINGLE_LABEL_WEIGHTED) {
- prefetched_label_.Resize(int64_t(batch_size_), int64_t(num_labels_));
+ sizes = std::vector<int64_t>{int64_t(batch_size_), int64_t(num_labels_)};
} else {
- prefetched_label_.Resize(vector<int64_t>(1, batch_size_));
+ sizes = std::vector<int64_t>{batch_size_};
}
+ // data type for prefetched_label_ is actually not known here..
+ ReinitializeTensor(
+ &prefetched_label_,
+ sizes,
+ at::dtype<int>().device(CPU));
for (int i = 0; i < additional_output_sizes_.size(); ++i) {
prefetched_additional_outputs_on_device_.emplace_back();
// TODO: support color jitter and color lighting in gpu_transform
if (gpu_transform_) {
if (!mean_std_copied_) {
- mean_gpu_.Resize(mean_.size());
- std_gpu_.Resize(std_.size());
+ ReinitializeTensor(
+ &mean_gpu_,
+ {static_cast<int64_t>(mean_.size())},
+ at::dtype<float>().device(Context::GetDeviceType()));
+ ReinitializeTensor(
+ &std_gpu_,
+ {static_cast<int64_t>(std_.size())},
+ at::dtype<float>().device(Context::GetDeviceType()));
context_.template CopyFromCPU<float>(
mean_.size(),
const auto* maskData = mask.data<bool>();
const auto outerSize = mask.size(0);
- indices_.Resize(outerSize);
+ ReinitializeTensor(
+ &indices_, {outerSize}, at::dtype<int64_t>().device(CUDA));
auto* indicesData = indices_.mutable_data<int64_t>();
size_t numBytes = 0;
auto numint64_t =
static_cast<int64_t>((numBytes + sizeof(int64_t) - 1) / sizeof(int64_t));
// allocate one more int64_t at the end of scratch for storing numOfOutput
- scratch_.Resize(numint64_t + 1);
+ ReinitializeTensor(
+ &scratch_, {numint64_t + 1}, at::dtype<int64_t>().device(CUDA));
auto* scratchData = scratch_.mutable_data<int64_t>();
auto* numOfOutputData = scratchData + numint64_t;
}
private:
- Tensor indices_{CUDA};
- Tensor scratch_{CUDA};
+ Tensor indices_;
+ Tensor scratch_;
};
REGISTER_CUDA_OPERATOR(BooleanMask, BooleanMaskOp<CUDAContext>);
out->Resize(maskSize);
auto* dest = (char*)out->raw_mutable_data(meta);
- hostMasks_.Resize(numMasks);
+ ReinitializeTensor(&hostMasks_, {numMasks}, at::dtype<bool*>().device(CPU));
auto* hostMasksData = hostMasks_.mutable_data<bool*>();
- hostValues_.Resize(numMasks);
+ ReinitializeTensor(
+ &hostValues_, {numMasks}, at::dtype<char*>().device(CPU));
auto* hostValuesData = hostValues_.mutable_data<char*>();
- hostValueSizes_.Resize(numMasks);
+ ReinitializeTensor(
+ &hostValueSizes_, {numMasks}, at::dtype<int>().device(CPU));
auto* hostValueSizesData = hostValueSizes_.mutable_data<int>();
for (int i = 0; i < numMasks; ++i) {
auto& mask = Input(i * 2);
values_.CopyFrom(hostValues_);
valueSizes_.CopyFrom(hostValueSizes_);
- indices_.Resize(maskSize);
+ ReinitializeTensor(&indices_, {maskSize}, at::dtype<int>().device(CUDA));
auto* indicesData = indices_.mutable_data<int>();
ComputeIndicesKernel<<<
}
private:
- Tensor indices_{CUDA};
+ Tensor indices_;
Tensor masks_{CUDA};
Tensor values_{CUDA};
Tensor valueSizes_{CUDA};
- Tensor hostMasks_{CPU};
- Tensor hostValues_{CPU};
- Tensor hostValueSizes_{CPU};
+ Tensor hostMasks_;
+ Tensor hostValues_;
+ Tensor hostValueSizes_;
};
REGISTER_CUDA_OPERATOR(BooleanUnmask, BooleanUnmaskOp<CUDAContext>);
const int W = X.ndim() > 3 ? X.dim32(3) : 1;
const int D = X.ndim() > 4 ? X.dim32(4) : 1;
-
-
-
const auto Xarr = X.data<float>();
const auto dYarr = dY.data<float>();
const auto meanArr = mean.data<float>();
const auto numBlocksPerChannel = CAFFE_GET_BLOCKS(valsPerChannel);
const auto numBlocksTotal = numBlocksPerChannel * N * C;
- dBiasScratch_.Resize(numBlocksTotal);
- dScaleScratch_.Resize(numBlocksTotal);
+ ReinitializeTensor(
+ &dBiasScratch_, {numBlocksTotal}, at::dtype<float>().device(CUDA));
+ ReinitializeTensor(
+ &dScaleScratch_, {numBlocksTotal}, at::dtype<float>().device(CUDA));
ChannelBackpropStatsBlockKernel<CAFFE_CUDA_NUM_THREADS>
<<<numBlocksTotal, CAFFE_CUDA_NUM_THREADS, 0, context_.cuda_stream()>>>(
INPUT_TAGS(INPUT, SAVED_MEAN, SAVED_INV_STDDEV, OUTPUT_GRAD);
OUTPUT_TAGS(SCALE_GRAD, BIAS_GRAD);
- Tensor dBiasScratch_{Context::GetDeviceType()};
- Tensor dScaleScratch_{Context::GetDeviceType()};
+ Tensor dBiasScratch_;
+ Tensor dScaleScratch_;
};
} // namespace caffe2
const int W = X.ndim() > 3 ? X.dim32(3) : 1;
const int D = X.ndim() > 4 ? X.dim32(4) : 1;
-
-
-
const auto X_arr = X.data<float>();
const auto valsPerChannel = H * W * D;
const auto numBlocksPerChannel = CAFFE_GET_BLOCKS(valsPerChannel);
const auto numBlocksTotal = numBlocksPerChannel * N * C;
- sumScratch_.Resize(numBlocksTotal);
- sumsqScratch_.Resize(numBlocksTotal);
+ ReinitializeTensor(
+ &sumScratch_, {numBlocksTotal}, at::dtype<float>().device(CUDA));
+ ReinitializeTensor(
+ &sumsqScratch_, {numBlocksTotal}, at::dtype<float>().device(CUDA));
auto sum = Output(SUM, {C}, at::dtype<float>());
auto sumsq = Output(SUMSQ, {C}, at::dtype<float>());
INPUT_TAGS(INPUT);
OUTPUT_TAGS(SUM, SUMSQ);
- Tensor sumScratch_{Context::GetDeviceType()};
- Tensor sumsqScratch_{Context::GetDeviceType()};
+ Tensor sumScratch_;
+ Tensor sumsqScratch_;
};
} // namespace caffe2
bool RunOnDeviceWithOrderNHWC() override;
private:
- Tensor col_buffer_{Context::GetDeviceType()};
- Tensor bias_multiplier_{Context::GetDeviceType()};
+ Tensor col_buffer_;
+ Tensor bias_multiplier_;
Tensor img_shape_device_{Context::GetDeviceType()};
Tensor col_buffer_shape_device_{Context::GetDeviceType()};
bool no_bias_;
col_buffer_shape.push_back(C / group_ * kernel_dims_size);
col_buffer_shape.insert(
col_buffer_shape.end(), output_dims.begin(), output_dims.end());
- col_buffer_.Resize(col_buffer_shape);
+ vector<int64_t> col_buffer_shape_64;
+ std::copy(
+ col_buffer_shape.cbegin(),
+ col_buffer_shape.cend(),
+ std::back_inserter(col_buffer_shape_64));
+ ReinitializeTensor(
+ &col_buffer_,
+ col_buffer_shape_64,
+ at::dtype<T>().device(Context::GetDeviceType()));
if (kernel_.size() != 2) {
+ // TODO: SetDeviceTensor accept vector<int64_t>
SetDeviceTensor(img_shape, &img_shape_device_);
SetDeviceTensor(col_buffer_shape, &col_buffer_shape_device_);
}
T* dbias_data = nullptr;
if (!no_bias_) {
auto* dbias = Output(BIAS_OR_INPUT_GRAD, {M}, at::dtype<T>());
- if (bias_multiplier_.numel() != output_image_size) {
- // If the helper bias multiplier is not M, reshape and fill it with one.
- bias_multiplier_.Resize(vector<int64_t>(1, output_image_size));
- math::Set<T, Context>(
- output_image_size,
- static_cast<T>(1),
- bias_multiplier_.template mutable_data<T>(),
- &context_);
- }
+ // Removed the check for whether bias_multiplier_ has correct size or not
+ ReinitializeTensor(
+ &bias_multiplier_,
+ vector<int64_t>(1, output_image_size),
+ at::dtype<T>().device(Context::GetDeviceType()));
+ math::Set<T, Context>(
+ output_image_size,
+ static_cast<T>(1),
+ bias_multiplier_.template mutable_data<T>(),
+ &context_);
dbias_data = dbias->template mutable_data<T>();
math::Set<T, Context>(dbias->numel(), 0, dbias_data, &context_);
}
vector<int> col_buffer_shape(output_dims.size() + 1);
std::copy(output_dims.cbegin(), output_dims.cend(), col_buffer_shape.begin());
col_buffer_shape.back() = C * kernel_dims_size;
- col_buffer_.Resize(col_buffer_shape);
+ vector<int64_t> col_buffer_shape_64;
+ std::copy(
+ col_buffer_shape.cbegin(),
+ col_buffer_shape.cend(),
+ std::back_inserter(col_buffer_shape_64));
+ ReinitializeTensor(
+ &col_buffer_,
+ col_buffer_shape_64,
+ at::dtype<T>().device(Context::GetDeviceType()));
if (kernel_.size() != 2) {
SetDeviceTensor(img_shape, &img_shape_device_);
auto* dbias = Output(BIAS_OR_INPUT_GRAD, {M}, at::dtype<T>());
dbias_data = dbias->template mutable_data<T>();
math::Set<T, Context>(dbias->numel(), 0, dbias_data, &context_);
- if (bias_multiplier_.numel() != output_image_size) {
- // If the helper bias multiplier is not M, reshape and fill it with one.
- bias_multiplier_.Resize(vector<int64_t>(1, output_image_size));
- math::Set<T, Context>(
- output_image_size,
- static_cast<T>(1),
- bias_multiplier_.template mutable_data<T>(),
- &context_);
- }
+ // Removed the check for whether bias_multiplier_ has correct size or not
+ ReinitializeTensor(
+ &bias_multiplier_,
+ vector<int64_t>(1, output_image_size),
+ at::dtype<T>().device(Context::GetDeviceType()));
+ math::Set<T, Context>(
+ output_image_size,
+ static_cast<T>(1),
+ bias_multiplier_.template mutable_data<T>(),
+ &context_);
}
for (int image_id = 0; image_id < N; ++image_id) {
bool RunOnDeviceWithOrderNHWC() override;
private:
- Tensor col_buffer_{Context::GetDeviceType()};
- Tensor bias_multiplier_{Context::GetDeviceType()};
+ Tensor col_buffer_;
+ Tensor bias_multiplier_;
// Input: X, W, b
// Output: Y
INPUT_TAGS(INPUT, FILTER, BIAS);
bool RunOnDeviceWithOrderNHWC() override;
private:
- Tensor col_buffer_{Context::GetDeviceType()};
- Tensor bias_multiplier_{Context::GetDeviceType()};
+ Tensor col_buffer_;
+ Tensor bias_multiplier_;
const bool no_bias_;
// input: X, W, dY
// output: dW, optionally db and dX
CAFFE_ENFORCE(
bias.dim32(0) == C,
"bias dimension must be equal to output channel number");
- if (bias_multiplier_.numel() != output_image_size) {
- bias_multiplier_.Resize(vector<int64_t>(1, output_image_size));
+ ReinitializeTensor(
+ &bias_multiplier_,
+ {1, output_image_size},
+ at::dtype<T>().device(Context::GetDeviceType()));
T* bm_data = bias_multiplier_.template mutable_data<T>();
math::Set<T, Context>(
output_image_size,
static_cast<T>(1),
bm_data,
&context_);
- }
}
const T* Xdata = X.template data<T>();
T* Ydata = Y->template mutable_data<T>();
auto f = [&](Tensor* col_buffer) {
- col_buffer->Resize(
- vector<int64_t>{C, this->kernel_h(), this->kernel_w(), H, W});
+ ReinitializeTensor(col_buffer, vector<int64_t>{C, this->kernel_h(), this->kernel_w(), H, W}, at::dtype<T>().device(Context::GetDeviceType()));
T* col_buffer_data = col_buffer->template mutable_data<T>();
for (auto image_id = 0; image_id < N; ++image_id) {
// Weight term
CAFFE_ENFORCE(
bias.dim32(0) == C,
"bias dimension must be equal to output channel number");
- if (bias_multiplier_.numel() != output_image_size) {
- bias_multiplier_.Resize(vector<int64_t>(1, output_image_size));
+ // TODO(jerryzh): is it OK to remove the check of whether numel is output_image_size
+ ReinitializeTensor(
+ &bias_multiplier_,
+ {1, output_image_size},
+ at::dtype<T>().device(Context::GetDeviceType()));
T* bm_data = bias_multiplier_.template mutable_data<T>();
math::Set<T, Context>(
output_image_size,
static_cast<T>(1),
bm_data,
&context_);
- }
}
const T* Xdata = X.template data<T>();
const T* filter_data = filter.template data<T>();
T* Ydata = Y->template mutable_data<T>();
auto f = [&](Tensor* /*col_buffer*/) {
- col_buffer_.Resize(
- vector<int64_t>{H, W, this->kernel_h(), this->kernel_w(), C});
+ ReinitializeTensor(
+ &col_buffer_,
+ vector<int64_t>{H, W, this->kernel_h(), this->kernel_w(), C},
+ at::dtype<T>().device(Context::GetDeviceType()));
T* col_buffer_data = col_buffer_.template mutable_data<T>();
for (auto image_id = 0; image_id < N; ++image_id) {
// Weight term
const int kernel_dim = C * this->kernel_h() * this->kernel_w();
const int output_image_size = dY.dim32(2) * dY.dim32(3);
// The col buffer is stored in CHW order as well
- col_buffer_.Resize(
- vector<int64_t>{C, this->kernel_h(), this->kernel_w(), H, W});
+ ReinitializeTensor(
+ &col_buffer_,
+ vector<int64_t>{C, this->kernel_h(), this->kernel_w(), H, W},
+ at::dtype<T>().device(Context::GetDeviceType()));
if (!no_bias_) {
auto* dbias = Output(BIAS_OR_INPUT_GRAD);
dbias->Resize(C);
- if (bias_multiplier_.numel() != output_image_size) {
- bias_multiplier_.Resize(1, output_image_size);
- T* bm_data = bias_multiplier_.template mutable_data<T>();
- math::Set<T, Context>(
- output_image_size,
- static_cast<T>(1),
- bm_data,
- &context_);
- }
+ // TODO(jerryzh): is it OK to remove the check of whether numel is output_image_size
+ ReinitializeTensor(
+ &bias_multiplier_,
+ {1, output_image_size},
+ at::dtype<T>().device(Context::GetDeviceType()));
+ T* bm_data = bias_multiplier_.template mutable_data<T>();
+ math::Set<T, Context>(
+ output_image_size,
+ static_cast<T>(1),
+ bm_data,
+ &context_);
}
T* col_buffer_data = col_buffer_.template mutable_data<T>();
const T* Xdata = X.template data<T>();
const int kernel_dim = C * this->kernel_h() * this->kernel_w();
const int output_image_size = dY.dim32(1) * dY.dim32(2);
// The col buffer is stored in HWC order as well
- col_buffer_.Resize(
- vector<int64_t>{H, W, this->kernel_h(), this->kernel_w(), C});
+ ReinitializeTensor(
+ &col_buffer_,
+ vector<int64_t>{H, W, this->kernel_h(), this->kernel_w(), C},
+ at::dtype<T>().device(Context::GetDeviceType()));
if (!no_bias_) {
auto* dbias = Output(BIAS_OR_INPUT_GRAD);
dbias->Resize(C);
- if (bias_multiplier_.numel() != output_image_size) {
- bias_multiplier_.Resize(1, output_image_size);
- T* bm_data = bias_multiplier_.template mutable_data<T>();
- math::Set<T, Context>(
- output_image_size,
- static_cast<T>(1),
- bm_data,
- &context_);
- }
+ // TODO(jerryzh): is it OK to remove the check of whether numel is output_image_size
+ ReinitializeTensor(
+ &bias_multiplier_,
+ {1, output_image_size},
+ at::dtype<T>().device(Context::GetDeviceType()));
+ T* bm_data = bias_multiplier_.template mutable_data<T>();
+ math::Set<T, Context>(
+ output_image_size,
+ static_cast<T>(1),
+ bm_data,
+ &context_);
}
T* col_buffer_data = col_buffer_.template mutable_data<T>();
const T* Xdata = X.template data<T>();
private:
Tensor col_buffer_{Context::GetDeviceType()};
- Tensor bias_multiplier_{Context::GetDeviceType()};
+ Tensor bias_multiplier_;
Tensor img_shape_device_{Context::GetDeviceType()};
Tensor col_buffer_shape_device_{Context::GetDeviceType()};
// Input: X, o, W, b
bool RunOnDeviceWithOrderNCHW() override;
private:
- Tensor col_buffer_{Context::GetDeviceType()};
- Tensor bias_multiplier_{Context::GetDeviceType()};
+ Tensor col_buffer_;
+ Tensor bias_multiplier_;
Tensor img_shape_device_{Context::GetDeviceType()};
Tensor col_buffer_shape_device_{Context::GetDeviceType()};
bool no_bias_;
// If the helper bias multiplier is not image size, reshape and fill it
// with
// one.
- bias_multiplier_.Resize(vector<int64_t>(1, output_image_size));
+ ReinitializeTensor(
+ &bias_multiplier_,
+ vector<int64_t>(1, output_image_size),
+ at::dtype<T>().device(Context::GetDeviceType()));
math::Set<T, Context>(
output_image_size,
static_cast<T>(1),
col_buffer_shape.push_back(C * kernel_dims_size);
col_buffer_shape.insert(
col_buffer_shape.end(), output_dims.begin(), output_dims.end());
- col_buffer_.Resize(col_buffer_shape);
+ ReinitializeTensor(
+ &col_buffer_,
+ col_buffer_shape,
+ at::dtype<T>().device(Context::GetDeviceType()));
const int col_buffer_offset = col_buffer_.size() / group_;
auto* dbias = Output(BIAS_OR_INPUT_GRAD, {M}, at::dtype<T>());
if (bias_multiplier_.size() != output_image_size) {
// If the helper bias multiplier is not M, reshape and fill it with one.
- bias_multiplier_.Resize(vector<int64_t>(1, output_image_size));
+ ReinitializeTensor(
+ &bias_multiplier_,
+ vector<int64_t>(1, output_image_size),
+ at::dtype<T>().device(Context::GetDeviceType()));
math::Set<T, Context>(
output_image_size,
static_cast<T>(1),