bool DoRunWithType() {
auto& dataInput = Input(0);
auto& lengthsInput = Input(LENGTHS);
- auto* output = Output(0);
CAFFE_ENFORCE_EQ(1, lengthsInput.ndim(), "LENGTHS must be a vector");
const int64_t dataSize = dataInput.dim(0);
auto shape = dataInput.dims().vec();
shape[0] = outputSize;
- output->Resize(shape);
+ auto* output = Output(0, shape, at::dtype<T>());
T* out_data = output->template mutable_data<T>();
if (len_length <= 0) {
bool DoRunWithType() {
auto& dataInput = Input(0);
auto& lengthsInput = Input(LENGTHS);
- auto* output = Output(0);
CAFFE_ENFORCE_EQ(1, lengthsInput.ndim(), "LENGTHS must be a vector");
const int64_t dataSize = dataInput.dim(0);
auto shape = dataInput.dims().vec();
shape[0] = outputSize;
- output->Resize(shape);
+ auto* output = Output(0, shape, at::dtype<T>());
T* out_data = output->template mutable_data<T>();
if (len_length <= 0) {
bool DoRunWithType() {
auto& dataInput = Input(0);
auto& lengthsInput = Input(LENGTHS);
- auto* output = Output(0);
CAFFE_ENFORCE_EQ(1, lengthsInput.ndim(), "LENGTHS must be a vector");
const int64_t dataSize = dataInput.dim(0);
auto shape = dataInput.dims().vec();
shape[0] = outputSize;
- output->Resize(shape);
+ auto* output = Output(0, shape, at::dtype<T>());
if (len_length <= 0) {
// return early to avoid invalid empty kernel
auto& weightsInput = Input(WEIGHTS);
auto& indicesInput = Input(INDICES);
auto& lengthsInput = Input(LENGTHS);
- auto* output = Output(0);
CAFFE_ENFORCE_EQ(1, weightsInput.ndim(), "WEIGHTS must be a vector");
CAFFE_ENFORCE_EQ(1, indicesInput.ndim(), "INDICES must be a vector");
auto shape = dataInput.dims().vec();
shape[0] = outputSize;
- output->Resize(shape);
+ auto* output = Output(0, shape, at::dtype<T>());
T* out_data = output->template mutable_data<T>();
if (len_length <= 0) {
bool RunOnDevice() override {
auto& data = Input(0);
auto& segment_ids = Input(1);
- auto* output = Output(0);
if (segment_ids.size() == 0 || data.size() == 0) {
// Special handling for empty input
if (dims.size() > 0) {
dims[0] = 0;
}
- output->Resize(dims);
- output->template mutable_data<T>();
+ Output(0, dims, at::dtype<T>());
return true;
}
auto dims = data.dims().vec();
dims[0] = K + 1;
- output->Resize(dims);
+ auto* output = Output(0, dims, at::dtype<T>());
// Clear the output as we will be accumulating the values
math::Set<T, CUDAContext>(
auto& segmentGradsInput = Input(0);
auto& lengthsInput = Input(1);
auto& indicesInput = Input(2);
- auto* dataGradsOutput = Output(0);
+
CAFFE_ENFORCE_EQ(1, lengthsInput.ndim(), "LENGTHS must be a vector");
const int len_length = lengthsInput.dim(0);
auto shape = segmentGradsInput.dims().vec();
int output_0dim = indicesInput.dim(0);
shape[0] = output_0dim;
- dataGradsOutput->Resize(shape);
+ auto* dataGradsOutput = Output(0, shape, at::dtype<T>());
T* out_data = dataGradsOutput->template mutable_data<T>();
if (len_length <= 0) {
auto& segmentGradsInput = Input(0);
auto& lengthsInput = Input(1);
auto& indicesInput = Input(2);
- auto* dataGradsOutput = Output(0);
+
CAFFE_ENFORCE_EQ(1, lengthsInput.ndim(), "LENGTHS must be a vector");
const int len_length = lengthsInput.dim(0);
auto shape = segmentGradsInput.dims().vec();
int output_0dim = indicesInput.dim(0);
shape[0] = output_0dim;
- dataGradsOutput->Resize(shape);
+ auto* dataGradsOutput = Output(0, shape, at::dtype<T>());
T* out_data = dataGradsOutput->template mutable_data<T>();
if (len_length <= 0) {
auto& segmentGradsInput = Input(1);
auto& lengthsInput = Input(2);
auto& indicesInput = Input(3);
- auto* dataGradsOutput = Output(0);
+
CAFFE_ENFORCE_EQ(1, lengthsInput.ndim(), "LENGTHS must be a vector");
CAFFE_ENFORCE_EQ(1, weightsInput.ndim(), "WEIGHTS must be a vector");
auto shape = segmentGradsInput.dims().vec();
int output_0dim = indicesInput.dim(0);
shape[0] = output_0dim;
- dataGradsOutput->Resize(shape);
+ auto* dataGradsOutput = Output(0, shape, at::dtype<T>());
T* out_data = dataGradsOutput->template mutable_data<T>();
if (len_length <= 0) {
// return early to avoid invalid empty kernel
auto& lengthsInput = Input(2);
auto& dataInput = Input(3);
auto& dataOutput = Input(0); // based on CPU version
- auto* dataGradsOutput = Output(0);
+
CAFFE_ENFORCE_EQ(1, lengthsInput.ndim(), "LENGTHS must be a vector");
int len_length = lengthsInput.dim(0);
CAFFE_ENFORCE(segmentGradsInput.ndim() > 0);
inclusive_scan_length_buffer_.template data<int>();
auto shape = dataInput.dims().vec();
- dataGradsOutput->Resize(shape);
+ auto* dataGradsOutput = Output(0, shape, at::dtype<T>());
const T* in_data = segmentGradsInput.template data<T>();
T* out_data = dataGradsOutput->template mutable_data<T>();
auto& lengthsInput = Input(2);
auto& dataInput = Input(3);
auto& indicesInput = Input(4);
- auto* dataGradsOutput = Output(0);
+
auto* weightGradsOutput = Output(1);
CAFFE_ENFORCE_EQ(1, lengthsInput.ndim(), "LENGTHS must be a vector");
CAFFE_ENFORCE_EQ(1, weightsInput.ndim(), "WEIGHTS must be a vector");
auto shape = segmentGradsInput.dims().vec();
int output_0dim = indicesInput.dim(0);
shape[0] = output_0dim;
- dataGradsOutput->Resize(shape);
+ auto* dataGradsOutput = Output(0, shape, at::dtype<T>());
weightGradsOutput->ResizeLike(indicesInput);
T* out_data_grads = dataGradsOutput->template mutable_data<T>();
T* out_weight_grads = weightGradsOutput->template mutable_data<T>();
int32_t* lengths_out_ptr = nullptr;
if (OutputSize() > 1) {
- auto* lengths_out = Output(1);
- lengths_out->Resize(lengths_size);
+ auto* lengths_out = Output(1, {lengths_size}, at::dtype<int32_t>());
lengths_out_ptr = lengths_out->template mutable_data<int32_t>();
}
lengths_size = lengths.size();
}
- auto* out = Output(0);
- {
- auto out_dims = in.dims().vec();
- out_dims[0] -= (startPaddingWidth_ + endPaddingWidth_) * lengths_size;
- out->Resize(std::move(out_dims));
- }
+ auto out_dims = in.dims().vec();
+ out_dims[0] -= (startPaddingWidth_ + endPaddingWidth_) * lengths_size;
+ auto* out = Output(0, out_dims, at::dtype<T>());
const auto* in_ptr = in.template data<T>();
auto* out_ptr = out->template mutable_data<T>();
int32_t* lengths_out_ptr = nullptr;
if (OutputSize() > 1) {
- auto* lengths_out = Output(1);
- lengths_out->Resize(lengths_size);
+ auto* lengths_out = Output(1, {lengths_size}, at::dtype<int32_t>());
lengths_out_ptr = lengths_out->template mutable_data<int32_t>();
}
auto& X = Input(0); // Logits
auto& T = Input(1); // Labels / targets
auto* P = Output(0); // Probabilities from softmax
- auto* avg_loss = Output(1); // Average loss
- const float* weights = (InputSize() > 2 ? Input(2).data<float>() : NULL);
+ const float* weights = (InputSize() > 2 ? Input(2).data<float>() : NULL);
const auto canonical_axis = X.canonical_axis_index(axis_);
int N, D;
N = X.size_to_dim(canonical_axis); // batch size
}
}
- avg_loss->Resize(vector<int64_t>());
+ auto* avg_loss =
+ Output(1, vector<int64_t>(), at::dtype<float>()); // Average loss
if (losses_.size() != N) {
losses_.Resize(N);
}
auto& X = Input(0); // Logits
auto& T = Input(1); // Labels / targets
auto* P = Output(0); // Probabilities from softmax
- auto* avg_loss = Output(1); // Average loss
+
const float* weights = (InputSize() > 2 ? Input(2).data<float>() : NULL);
int N, D;
N = X.dim32(0);
context_.cuda_stream()>>>(N, D, W, H, Xdata, Pdata);
// Cross entropy
- avg_loss->Resize(vector<int64_t>());
+ auto* avg_loss =
+ Output(1, vector<int64_t>(), at::dtype<float>()); // Average loss
float* avg_loss_data = avg_loss->template mutable_data<float>();
math::Set<float, CUDAContext>(1, 0.0f, avg_loss_data, &context_);
auto shape = sparse_values.dims().vec();
shape[0] = output_first_dim;
- auto* output = Output(0);
- output->Resize(shape);
+
+ auto* output = Output(0, shape, at::dtype<TData>());
TData* output_data = output->template mutable_data<TData>();
math::Set<TData>(output->size(), TData(0), output_data, &context_);
<< standard_deviation << std::endl;
}
if (OutputSize()) {
- auto* Y = Output(0);
- Y->Resize(4);
+ auto* Y = Output(0, {4}, at::dtype<float>());
float output_buffer[NUM_STATS] = {result.min, result.max, result.mean,
standard_deviation};
context_.CopyFromCPU<float>(
// use dim32 to enforce that it's fine to have remapping of type int
int N = inputTensor.dim32(0);
CAFFE_ENFORCE_EQ(inputTensor.ndim(), 1, "Input should be a vector");
- auto* uniqueTensor = Output(UNIQUE);
int* remapping = nullptr;
if (REMAPPING < OutputSize()) {
if (N <= 0) {
// if the input is empty, we have nothing to do, not even launch kernel.
- uniqueTensor->Resize(0);
- T* unique = uniqueTensor->template mutable_data<T>();
+ /* auto* uniqueTensor = */ Output(UNIQUE, {0}, at::dtype<T>());
return true;
}
order2.begin());
int K = new_last.first - buffer;
- uniqueTensor->Resize(K);
+ auto* uniqueTensor = Output(UNIQUE, {K}, at::dtype<T>());
T* unique = uniqueTensor->template mutable_data<T>();
context_.CopyItemsSameDevice(thrust_unique_buffer_.meta(), K, buffer, unique);
template <>
bool UpsampleBilinearOp<float, CUDAContext>::RunOnDevice() {
const auto& X = Input(0);
- auto* Y = Output(0);
const auto inputDims = X.dims();
CAFFE_ENFORCE_EQ(4, inputDims.size());
}
int output_width = input_width * width_scale_;
int output_height = input_height * height_scale_;
- Y->Resize(batch_size, num_channels, output_height, output_width);
+ auto* Y = Output(
+ 0,
+ {batch_size, num_channels, output_height, output_width},
+ at::dtype<float>());
const auto size = Y->size();
UpsampleBilinearKernel<<<
bool UpsampleBilinearGradientOp<float, CUDAContext>::RunOnDevice() {
const auto& dY = Input(0);
const auto& X = Input(1);
- auto* dX = Output(0);
const auto inputDims = dY.dims();
CAFFE_ENFORCE_EQ(4, inputDims.size());
height_scale_ = scales_data[0];
width_scale_ = scales_data[1];
}
- dX->Resize(batch_size, num_channels, output_height, output_width);
+ auto* dX = Output(
+ 0,
+ {batch_size, num_channels, output_height, output_width},
+ at::dtype<float>());
math::Set<float, CUDAContext>(
dX->size(), 0.0f, dX->mutable_data<float>(), &context_);
bool RunOnDevice() override {
auto& input = Input(0);
- auto* output = Output(0);
- output->Resize(vector<int64_t>());
+ auto* output = Output(0, vector<int64_t>(), at::dtype<int64_t>());
auto* output_data = output->template mutable_data<int64_t>();
auto size = input.numel();
} else {
length = static_cast<int>(ceil(diff / step));
}
- auto* output = Output(0);
+
// Match numpy's behavior here.
if (length <= 0) {
- output->Resize(0);
- // Called for the side effect of setting the data.
- output->template mutable_data<T>();
+ Output(0, {0}, at::dtype<T>());
return true;
} else {
- output->Resize(length);
+ auto* output = Output(0, {length}, at::dtype<T>());
return DoRunOnDevice<T>(start, step, output);
}
}
"The number of tensors of the input and the output must be the same.");
auto& in_weights = Input(0);
- auto* out_idx = Output(0);
+
int batch_size = in_weights.dim(0);
int weights_dim = in_weights.dim(1);
if (batch_size > 0 && weights_dim > 0) {
- out_idx->Resize(batch_size, 1);
+ auto* out_idx = Output(0, {batch_size, 1}, at::dtype<int>());
unif_samples_.Resize(batch_size);
const float* in_weights_data = in_weights.data<float>();
"The sampling weights tensor and the sampling values tensor must have the same dimensions.");
in_val_data = in_val.data<float>();
- auto* out_val = Output(1);
- out_val->Resize(batch_size, 1);
+ auto* out_val = Output(1, {batch_size, 1}, at::dtype<float>());
out_val_data = out_val->template mutable_data<float>();
}
out_idx_data,
out_val_data);
} else {
- out_idx->Resize(0);
- out_idx->template mutable_data<int>();
+ /* out_idx = */ Output(0, {0}, at::dtype<int>());
if (OutputSize() == 2) {
- auto* out_val = Output(1);
- out_val->Resize(0);
- out_val->template mutable_data<float>();
+ /* out_val = */ Output(1, {0}, at::dtype<float>());
}
}
bool PSRoIPoolOp<float, CUDAContext>::RunOnDevice() {
auto& X = Input(0); // Input data to pool
auto& R = Input(1); // RoIs
- auto* Y = Output(0); // PSRoI pooled data
- auto* A = Output(1); // mapping_channel
+ // PSRoI pooled data
+ // mapping_channel
- Y->Resize(R.dim32(0), output_dim_, pooled_height_, pooled_width_);
- A->Resize(Y->dims());
+ auto* Y = Output(0, {R.dim32(0), output_dim_, pooled_height_, pooled_width_}, at::dtype<float>());
+ auto* A = Output(1, Y->dims(), at::dtype<int>());
int output_size = Y->size();
PSRoIPoolForward<float><<<CAFFE_GET_BLOCKS(output_size),
CAFFE_CUDA_NUM_THREADS,
bool RoIPoolFOp<float, CUDAContext>::RunOnDevice() {
auto& X = Input(0); // Input data to pool
auto& R = Input(1); // RoIs
- auto* Y = Output(0); // RoI pooled data
- auto* A = Output(1); // argmaxes
if (R.size() == 0) {
// Handle empty rois
- Y->Resize(0, X.dim32(1), pooled_height_, pooled_width_);
- A->Resize(0, X.dim32(1), pooled_height_, pooled_width_);
- // The following mutable_data calls are needed to allocate the tensors
- Y->mutable_data<float>();
- A->mutable_data<int>();
+ std::vector<int64_t> sizes = {0, X.dim32(1), pooled_height_, pooled_width_};
+ /* auto* Y = */ Output(0, sizes, at::dtype<float>());
+ /* auto* A = */ Output(1, sizes, at::dtype<int>());
return true;
}
- Y->Resize(R.dim32(0), X.dim32(1), pooled_height_, pooled_width_);
- A->Resize(Y->dims());
+ auto* Y = Output(0, {R.dim32(0), X.dim32(1), pooled_height_, pooled_width_}, at::dtype<float>()); // RoI pooled data
+ auto* A = Output(1, Y->sizes(), at::dtype<int>()); // argmaxes
int output_size = Y->size();
RoIPoolFForward<float><<<CAFFE_GET_BLOCKS(output_size),
CAFFE_CUDA_NUM_THREADS,
bool SampleAsOp<float, CUDAContext>::RunOnDevice() {
auto& X = Input(0); // Input data to be sliced
auto& L = Input(1); // Target data that provide the identity
- auto* Y = Output(0); // Sliced data (Y.dim32(0) = num of (L > 0))
+ // Sliced data (Y.dim32(0) = num of (L > 0))
CAFFE_ENFORCE(
X.dim32(0) == L.dim32(0),
// resize Y
vector<int64_t> out_shape(X.dims().vec());
out_shape[0] = count;
- Y->Resize(out_shape);
+ auto* Y = Output(0, out_shape, at::dtype<float>());
const int len = X.size() / X.dim32(0);
auto& L = Input(2);
// total number of fg boxes across all FPN levels: scalar
auto& S = Input(3);
- auto* avg_loss = Output(0);
- avg_loss->Resize(vector<int64_t>());
+
+ auto* avg_loss = Output(0, vector<int64_t>(), at::dtype<float>());
if (Y.size() == 0){
math::Set<float, CUDAContext>(
1, static_cast<float>(0), avg_loss->mutable_data<float>(), &context_);
bool SigmoidCrossEntropyLossOp<float, CUDAContext>::RunOnDevice() {
auto& X = Input(0);
auto& T = Input(1);
- auto* avg_loss = Output(0);
+
CAFFE_ENFORCE(
X.size() == T.size(),
" vs. ",
T.size(),
")");
- avg_loss->Resize(vector<int64_t>());
+ auto* avg_loss = Output(0, vector<int64_t>(), at::dtype<float>());
counts_.ResizeLike(X);
losses_.ResizeLike(X);
normalizer_.Resize(vector<int64_t>());
// Number of positive examples: scalar
auto& wp = Input(2);
// output avg Sigmoid focal loss as mentioned in RetinaNet paper
- auto* avg_loss = Output(0);
+
int N = X.dim32(0);
int D = X.dim32(1);
int H = X.dim32(2);
int W = X.dim32(3);
- avg_loss->Resize(vector<int64_t>());
+ auto* avg_loss = Output(0, vector<int64_t>(), at::dtype<float>());
losses_.ResizeLike(X);
float* avg_loss_data = avg_loss->mutable_data<float>();
auto& Y = Input(1);
auto& alpha_in = Input(2);
auto& alpha_out = Input(3);
- auto* avg_loss = Output(0);
+
int N = Y.dim32(0);
// Require the same number of elements along axis 0 (batch size), but
CAFFE_ENFORCE_EQ(Y_hat.size(), alpha_in.size());
CAFFE_ENFORCE_EQ(Y_hat.size(), alpha_out.size());
- avg_loss->Resize(vector<int64_t>());
+ auto* avg_loss = Output(0, vector<int64_t>(), at::dtype<float>());
buff_.ResizeLike(Y);
// Difference
auto& X = Input(0); // Logits
auto& T = Input(1); // Labels
auto& wp = Input(2); // num of foregound
- auto* avg_loss = Output(0); // average loss as output
- auto* P = Output(1); // softmax probability, going to be re-used in gradient
+ // average loss as output
+ // softmax probability, going to be re-used in gradient
int N = X.dim32(0);
int D = X.dim32(1);
int A = D / num_classes_;
losses_.Resize(N * A * H * W);
- P->Resize(N * D * H * W);
- avg_loss->Resize(vector<int64_t>());
+ auto* P = Output(1, {N * D * H * W}, at::dtype<float>());
+ auto* avg_loss = Output(0, vector<int64_t>(), at::dtype<float>());
math::Set<float, CUDAContext>(
avg_loss->size(), 0.f, avg_loss->mutable_data<float>(), &context_);
math::Set<float, CUDAContext>(
// Narrows input 0 (A) spatially to match input 1 (B)
auto& A = Input(0);
auto& B = Input(1);
- auto* C = Output(0);
+
CAFFE_ENFORCE_EQ(A.dim32(0), B.dim32(0), "Input dim 0 must be equal.");
+ std::vector<int64_t> sizes;
if (A.ndim() == B.ndim()) {
CAFFE_ENFORCE_EQ(A.dim32(1), B.dim32(1), "Input dim 1 must be equal.");
CAFFE_ENFORCE_GE(
A.dim32(2), B.dim32(2), "Input 0 height must be >= input 1 height.");
CAFFE_ENFORCE_GE(
A.dim32(3), B.dim32(3), "Input 0 width must be >= input 1 width.");
-
- C->ResizeLike(B);
+ sizes = B.sizes().vec();
} else {
// For (N, H, W) case
CAFFE_ENFORCE_EQ(A.ndim() - 1, B.ndim(), "Dimension mismatch.");
A.dim32(2), B.dim32(1), "Input 0 height must be >= input 1 height.");
CAFFE_ENFORCE_GE(
A.dim32(3), B.dim32(2), "Input 0 width must be >= input 1 width.");
- C->Resize(A.dim32(0), A.dim32(1), B.dim32(1), B.dim32(2));
+ sizes = {A.dim32(0), A.dim32(1), B.dim32(1), B.dim32(2)};
}
+ auto* C = Output(0, sizes, at::dtype<T>());
int out_width = C->dim32(3);
int out_height = C->dim32(2);
int in_width = A.dim32(3);