bool RunOnDevice() override {
auto& input = Input(0);
- auto* output = Output(0);
+
auto* input_data = input.template data<int32_t>();
CAFFE_ENFORCE_EQ(input.dim(), 1, "Input must be a vector.");
auto len_sum = std::accumulate(input_data, input_data + input.numel(), 0);
- output->Resize(len_sum);
+ auto* output = Output(0, {len_sum}, at::dtype<int32_t>());
auto* output_data = output->template mutable_data<int32_t>();
int32_t offset = 0;
}
const auto dupSize = dupIndices.size();
- auto* output = Output(0);
- output->Resize(dupSize);
+
+ auto* output = Output(0, {static_cast<int64_t>(dupSize)}, at::dtype<int64_t>());
auto* out_ptr = output->template mutable_data<int64_t>();
for (int64_t i = 0; i < dupSize; ++i) {
out_ptr[i] = dupIndices[i];
bool FlexibleTopKOp<T, Context>::RunOnDevice() {
auto& input = Input(0);
auto& k = Input(1);
- auto* values = Output(0);
- auto* indices = Output(1);
const T* input_data = input.template data<T>();
const int64_t* k_data = k.template data<int64_t>();
k_data[i]);
output_size += k_data[i];
}
- values->Resize(output_size);
- indices->Resize(output_size);
+ auto* values = Output(0, {output_size}, at::dtype<T>());
+ auto* indices = Output(1, {output_size}, at::dtype<int64_t>());
T* values_data = values->template mutable_data<T>();
int64_t* indices_data = indices->template mutable_data<int64_t>();
auto& k = Input(1);
auto& values = Input(2);
auto& indices = Input(3);
- auto* output = Output(0);
const int64_t* k_data = k.template data<int64_t>();
const T* values_data = values.template data<T>();
// Resize output tensors to be as orignial_input size and initialized with 0
CAFFE_ENFORCE_GT(original_input.dim(), 0);
vector<int64_t> original_dims = original_input.sizes().vec();
- output->Resize(original_dims);
+ auto* output = Output(0, original_dims, at::dtype<T>());
T* output_data = output->template mutable_data<T>();
math::Set<T, Context>(
output->numel(), static_cast<T>(0), output_data, &context_);
const auto& X = Input(0);
const auto& W = Input(1);
const auto& b = Input(2);
- auto* Y = Output(0);
+
CAFFE_ENFORCE(b.dim() == 1, b.dim());
// batch size
const auto canonical_axis = X.canonical_axis_index(axis_);
DCHECK_LE(canonical_axis + 1, Y_shape_cache_.size());
Y_shape_cache_.resize(canonical_axis + 1);
Y_shape_cache_[canonical_axis] = N;
- Y->Resize(Y_shape_cache_);
+ auto* Y = Output(0, Y_shape_cache_, at::dtype<T_Y>());
CAFFE_ENFORCE(M * N == Y->numel(), dimErrorString());
if (X.numel() == 0) {
CAFFE_ENFORCE(IS_LITTLE_ENDIAN, "Unsupported endianness");
const auto& input = Input(DATA_FLOAT);
- auto* output = Output(DATA_FUSED_SCALE_BIAS_INT8);
const auto input_rows = input.size(0);
const auto input_columns = input.size(1);
// | number_of_columns | 4B | 4B |
const std::vector<int64_t> output_dimensions = {input_rows,
input_columns + 8};
- output->Resize(output_dimensions);
+ auto* output = Output(DATA_FUSED_SCALE_BIAS_INT8, output_dimensions, at::dtype<uint8_t>());
const auto* input_data = input.template data<T>();
auto* output_data = output->template mutable_data<uint8_t>();
CAFFE_ENFORCE(IS_LITTLE_ENDIAN, "Unsupported endianness");
const auto& input = Input(DATA_FLOAT);
- auto* output = Output(DATA_FUSED_QUANTIZED);
CAFFE_ENFORCE_EQ(
input.dim(),
size_t segment_size = (input_columns + data_per_byte - 1) / data_per_byte;
const std::vector<int64_t> output_dimensions = {
input_rows, 10 + static_cast<int64_t>(segment_size)};
- output->Resize(output_dimensions);
+ auto* output =
+ Output(DATA_FUSED_QUANTIZED, output_dimensions, at::dtype<uint8_t>());
const auto* input_data = input.template data<float>();
auto* output_data = output->template mutable_data<uint8_t>();
CAFFE_ENFORCE(IS_LITTLE_ENDIAN, "Unsupported endianness");
const auto& input = Input(DATA_FUSED_QUANTIZED);
- auto* output = Output(DATA_FLOAT);
+
CAFFE_ENFORCE_EQ(input.dim(), 2, "Expect input to be a matrix.");
CAFFE_ENFORCE_GE(
input.numel(),
const size_t output_columns = (input_columns - 10) * (8 / bitwidth) - tail;
const std::vector<int64_t> output_dimensions = {
input_rows, static_cast<int64_t>(output_columns)};
- output->Resize(output_dimensions);
+ auto* output = Output(DATA_FLOAT, output_dimensions, at::dtype<float>());
auto* output_data = output->template mutable_data<float>();
for (size_t row = 0; row < input_rows; ++row) {
math::decompress_and_dequantize(
bool DoRunWithType() {
const auto& data = Input(DATA);
const auto& indices = Input(INDICES);
- auto* output = Output(0);
CAFFE_ENFORCE_EQ(data.dim(), 2, "DATA must be a matrix");
CAFFE_ENFORCE_EQ(indices.dim(), 1, "INDICES must be a vector");
// Subtract 8 from the #columns of data for the 4 bytes for scale and 4
// bytes for bias that we use in the fused representation (per row).
const std::vector<int64_t> shape = {indices.size(0), data.size(1) - 8};
- output->Resize(shape);
+ auto* output = Output(0, shape, at::dtype<float>());
int block_size = shape[1];
auto block_bytesize = data.size_from_dim(1) * data.dtype().itemsize();
const auto& bbox_deltas = Input(1);
const auto& im_info_tensor = Input(2);
const auto& anchors = Input(3);
- auto* out_rois = Output(0);
- auto* out_rois_probs = Output(1);
CAFFE_ENFORCE_EQ(scores.dim(), 4, scores.dim());
CAFFE_ENFORCE(scores.template IsType<float>(), scores.dtype().name());
im_info_tensor.size(1));
const int roi_col_count = box_dim + 1;
- out_rois->Resize(0, roi_col_count);
- out_rois_probs->Resize(0);
+ auto* out_rois = Output(0, {0, roi_col_count}, at::dtype<float>());
+ auto* out_rois_probs = Output(1, {0}, at::dtype<float>());
std::vector<ERArrXXf> im_boxes(num_images);
std::vector<EArrXf> im_probs(num_images);
bool RunOnDevice() {
auto& X = Input(0);
- auto* Y = Output(0);
+
vector<int64_t> Yshape;
Yshape.insert(Yshape.end(), X.sizes().begin(), X.sizes().end());
const int split_index = dim_ == -1 ? Yshape.size() - 1 : dim_;
const int M = X.size_to_dim(split_index);
const int N = X.size_from_dim(split_index + 1);
Yshape[split_index] = split_dim_size;
- Y->Resize(Yshape);
+ auto* Y = Output(0, Yshape, at::dtype<T>());
ComputeGlu(
M,
split_dim_size,
T* mu_data = nullptr;
T* rsig_data = nullptr;
if (OutputSize() == 3) {
- auto* mu = Output(MU);
- auto* rsig = Output(INV_SIGMA);
- mu->Resize(N, G);
- rsig->Resize(N, G);
+ auto* mu = Output(MU, {N, G}, at::dtype<T>());
+ auto* rsig = Output(INV_SIGMA, {N, G}, at::dtype<T>());
mu_data = mu->template mutable_data<T>();
rsig_data = rsig->template mutable_data<T>();
} else {
const auto& W = Input(1);
const auto& b = Input(2);
auto& label = Input(3);
- auto* Y = Output(0);
- auto* intermediate_output = Output(1);
// Batch size
int M = X.dim() > 1 ? X.dim32(0) : 1;
// Sum of output dimensions of all hierarchy nodes
int N = W.dim32(0);
CAFFE_ENFORCE_EQ(N, b.dim32(0));
- Y->Resize(M);
+ auto* Y = Output(0, {M}, at::dtype<float>());
auto* Ydata = Y->template mutable_data<float>();
math::Set<float, CPUContext>(M, 0.f, Ydata, &context_);
const auto* labeldata = label.data<int>();
auto hierarchy = getHierarchyForLabels(M, labeldata, hierarchy_all_map_);
int int_output_size = getIntermediateOutputSize(labeldata, M, hierarchy);
- intermediate_output->Resize(int_output_size);
+ auto* intermediate_output = Output(1, {int_output_size}, at::dtype<float>());
float* int_output_data = intermediate_output->template mutable_data<float>();
int int_output_offset = 0;
auto& X = Input(0);
const auto& W = Input(1);
const auto& b = Input(2);
- auto* Y_names = Output(0);
- auto* Y_scores = Output(1);
+
// Batch size
int M = X.dim() > 1 ? X.dim32(0) : 1;
// Input feature dimension
// Sum of output dimensions of all hierarchy nodes
int N = W.dim32(0);
CAFFE_ENFORCE(N == b.dim32(0), "mismatch between Weight and Bias.");
- Y_names->Resize(M, top_n_);
- Y_scores->Resize(M, top_n_);
+ auto* Y_names = Output(0, {M, top_n_}, at::dtype<string>());
+ auto* Y_scores = Output(1, {M, top_n_}, at::dtype<float>());
if (bias_multiplier_.numel() != M) {
bias_multiplier_.Resize(M);
template <typename T, class Context>
bool HuffmanTreeHierarchyOp<T, Context>::RunOnDevice() {
const auto& Y = Input(0);
- auto treeOutput = Output(0);
+
CAFFE_ENFORCE_EQ(Y.dim(), 1, "Input labels must be a vector.");
const auto y_data = Y.template data<T>();
- treeOutput->Resize(1);
+ auto treeOutput = Output(0, {1}, at::dtype<string>());
std::vector<int> labelCounts;
labelCounts.resize(num_classes_, 0);
for (int i = 0; i < Y.dim32(0); ++i) {
bool HeatmapMaxKeypointOp<float, CPUContext>::RunOnDevice() {
const auto& heatmaps_in = Input(0);
const auto& bboxes_in = Input(1);
- auto* keypoints_out = Output(0);
CAFFE_ENFORCE_EQ(heatmaps_in.dim(), 4);
const int N = heatmaps_in.dim32(0);
} /* otherwise not initialized */
// Resize and wrap outputs in Eigen
- keypoints_out->Resize(N, 4, keypoint_count);
+ auto* keypoints_out = Output(0, {N, 4, keypoint_count}, at::dtype<float>());
Eigen::Map<ERArrXXf> keypoints(
keypoints_out->mutable_data<float>(), N, 4 * keypoint_count);
bool RunOnDevice() override {
auto& X = Input(0);
- auto* Y = Output(0);
+
CAFFE_ENFORCE(4 == X.dim());
int N = 0, C = 0, H = 0, W = 0;
switch (order_) {
case StorageOrder::NCHW: {
- Y->Resize(
- std::vector<int64_t>{N, C * kernel_h_ * kernel_w_, out_h, out_w});
+ auto* Y = Output(
+ 0,
+ std::vector<int64_t>{N, C * kernel_h_ * kernel_w_, out_h, out_w},
+ at::dtype<T>());
const size_t dx = X.numel() / N;
const size_t dy = Y->numel() / N;
}
}; break;
case StorageOrder::NHWC: {
- Y->Resize(
- std::vector<int64_t>{N, out_h, out_w, kernel_h_ * kernel_w_ * C});
+ auto* Y = Output(
+ 0,
+ std::vector<int64_t>{N, out_h, out_w, kernel_h_ * kernel_w_ * C},
+ at::dtype<T>());
const size_t dx = X.numel() / N;
const size_t dy = Y->numel() / N;
bool RunOnDevice() override {
auto& base = OperatorBase::Input<std::unique_ptr<IndexBase>>(0);
- auto* out = Output(0);
- out->Resize(std::vector<int64_t>{});
+
+ auto* out = Output(0, std::vector<int64_t>{}, at::dtype<int64_tValue>());
*out->template mutable_data<int64_tValue>() = base->Size();
return true;
}
template <>
bool IntegralImageOp<float, CPUContext>::RunOnDevice() {
const auto& X = Input(0);
- auto* Y = Output(0);
+
CAFFE_ENFORCE_EQ(X.dim(), 4, "Only supports 4D tensors for the momement");
vector<int64_t> out_shape(X.sizes().vec());
out_shape[2] += 1; // H + 1 output size
out_shape[3] += 1; // W + 1 output size
- Y->Resize(out_shape);
+ auto* Y = Output(0, out_shape, at::dtype<float>());
const int ind = X.dim32(0);
const int chans = X.dim32(1);
const int rows_in = X.dim32(2);
bool RunOnDevice() override {
auto& input = Input(0);
- auto* output = Output(0);
- output->Resize(std::vector<int64_t>{});
+
+ auto* output = Output(0, std::vector<int64_t>{}, at::dtype<bool>());
*output->template mutable_data<bool>() = (input.numel() == 0);
return true;
}
counts[k]++;
}
for (int k = 0; k < categorical_limit_; k++) {
- auto* eid = Output(k);
- eid->Resize(counts[k]);
+ auto* eid = Output(k, {counts[k]}, at::dtype<int>());
eids[k] = eid->template mutable_data<int>();
counts[k] = 0;
}
bool DoRunWithType() {
const auto& X = Input(0);
auto* Y = Output(0);
- auto* mean = Output(1);
- auto* sig = Output(2);
const int canonical_axis = X.canonical_axis_index(axis_);
std::vector<int64_t> moments_dims(
X.dims().cbegin(), X.dims().cbegin() + canonical_axis);
moments_dims.push_back(1);
- mean->Resize(moments_dims);
- sig->Resize(moments_dims);
- mean->template mutable_data<T>();
- sig->template mutable_data<T>();
- // TODO: change back
- //auto* mean = Output(1, moments_dims, at::dtype<T>());
- //auto* sig = Output(2, moments_dims, at::dtype<T>());
+ auto* mean = Output(1, moments_dims, at::dtype<T>());
+ auto* sig = Output(2, moments_dims, at::dtype<T>());
runLayerNorm<T>(X, Y, mean, sig, canonical_axis, epsilon_, &scale_, &bias_, &context_);
return true;
}
"`n_split` must contain a positive value for defined behavior.");
const auto M = L.numel();
- auto* Y = Output(0);
- Y->Resize(M * n_split_);
+ auto* Y = Output(0, {M * n_split_}, at::dtype<int32_t>());
const int32_t* Ldata = L.template data<int32_t>();
int32_t* Ydata = Y->template mutable_data<int32_t>();
bool DoRunWithType() {
auto& data = Input(DATA);
auto& lengths = Input(LENGTHS);
- auto* output = Output(0);
CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTHS must be 1-D");
CAFFE_ENFORCE_GE(data.dim(), 1, "DATA should be at least 1-D");
auto shape = data.sizes().vec();
shape[0] = lengths_size * target_length_;
- output->Resize(shape);
+ auto* output = Output(0, shape, at::dtype<T>());
auto block_size = data.size_from_dim(1);
auto src_data = data.template data<T>();
const auto& data = Input(DATA);
const auto& indices = Input(INDICES);
const auto& lengths = Input(LENGTHS);
- auto* output = Output(0);
CAFFE_ENFORCE_EQ(indices.dim(), 1, "INDICES must be a vector");
CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTHS must be a vector");
// Subtract 8 from the #columns of data for the 4 bytes for scale and 4
// bytes for bias that we use in the fused representation (per row).
const std::vector<int64_t> shape = {lengths.size(0), data.size(1) - 8};
- output->Resize(shape);
+ auto* output = Output(0, shape, at::dtype<float>());
Fused8BitRowwiseEmbeddingLookup(
/*block_size=*/output->size(1),
const int64_t M = lengthsInput.size(0);
const int64_t indices_size = indicesInput.numel();
- auto* output = Output(0);
auto shape = dataInput.sizes().vec();
shape[0] = M;
- output->Resize(shape);
+ auto* output = Output(0, shape, at::dtype<T>());
T* out_data = output->template mutable_data<T>();
const InputType* in_data = dataInput.template data<InputType>();
bool DoRunWithType() {
auto& dataInput = Input(DATA);
auto& lengthsInput = Input(LENGTHS);
- auto* output = Output(0);
+
auto* scale_bias = Input(SCALE_BIAS).template data<float>();
CAFFE_ENFORCE_EQ(1, lengthsInput.dim(), "LENGTHS must be a vector");
const int64_t outputSize = lengthsInput.size(0);
const int* lengths = lengthsInput.template data<int>();
vector<int64_t> shape = dataInput.sizes().vec();
shape[0] = outputSize;
- output->Resize(shape);
+ auto* output = Output(0, shape, at::dtype<OutDataT>());
const float* w = nullptr;
if (USE_WEIGHTS) {
w = Input(WEIGHTS).template data<float>();
bool RunOnDevice() override {
auto& input = Input(DATA_FLOAT);
auto* output = Output(DATA_UINT8);
- auto* scale_bias = Output(SCALE_BIAS);
+
auto* input_data = input.template data<float>();
output->ResizeLike(input);
vector<int64_t> scale_bias_dims = {input.size(0), 2};
- scale_bias->Resize(scale_bias_dims);
+ auto* scale_bias = Output(SCALE_BIAS, scale_bias_dims, at::dtype<float>());
auto* output_data = output->template mutable_data<uint8_t>();
float* scale_bias_data = scale_bias->template mutable_data<float>();
size_t n_blocks = input.size(0);
int N = Y.dim32(0);
const T* X_data = X.template data<T>();
const int* input_len = Y.template data<int>();
- auto* output_topk_values = Output(TOPK_VALUES_OUT);
- auto* output_topk_indices = Output(TOPK_INDICES_OUT);
-
- output_topk_values->Resize(N * k_);
- output_topk_indices->Resize(N * k_);
- std::vector<int> output_dims = std::vector<int>({N, k_});
- output_topk_values->Reshape(output_dims);
- output_topk_indices->Reshape(output_dims);
+
+ auto output_dims = std::vector<int64_t>({N, k_});
+ auto* output_topk_values = Output(TOPK_VALUES_OUT, output_dims, at::dtype<T>());
+ auto* output_topk_indices =
+ Output(TOPK_INDICES_OUT, output_dims, at::dtype<int>());
T* output_topk_values_data = output_topk_values->template mutable_data<T>();
int* output_topk_indices_data =
output_topk_indices->template mutable_data<int>();
auto& input_topk = Input(DER_TOPK_IN);
CAFFE_ENFORCE_EQ(
input_topk.numel(), N * k_, "input_topk shape is not correct");
- auto* X_out = Output(DER_X_OUT);
const int* input_len_data = input_len.template data<int>();
const int* input_indices_data = input_indices.template data<int>();
for (int i = 0; i < N; i++) {
num_indices += input_len_data[i];
}
- X_out->Resize(num_indices);
- std::vector<int> output_dims = std::vector<int>({num_indices});
- X_out->Reshape(output_dims);
+ auto* X_out = Output(DER_X_OUT, {num_indices}, at::dtype<T>());
T* X_out_data = X_out->template mutable_data<T>();
math::Set<T, Context>(num_indices, 0.0, X_out_data, &context_);
auto& y = Input(PRED);
auto& r = Input(REL);
auto& sid = Input(SESSION_LENS);
- auto* loss = Output(LOSS);
+
auto* dy = Output(DPRED);
const auto* session_lengths = sid.template data<int>();
CAFFE_ENFORCE(y.dim() == 1);
CAFFE_ENFORCE(y.numel() == r.numel());
dy->Resize(y.numel());
- loss->Resize(sid.numel());
+ auto* loss = Output(LOSS, {sid.numel()}, at::dtype<float>());
auto loss_vec = loss->template mutable_data<float>();
int start_id = 0;
for (int i = 0; i < sid.numel(); i++) {
auto& sids = Input(SESSION_LENS);
auto& dy_cache = Input(DY_CACHE);
auto& dLoss = Input(DLOSS);
- auto* dy = Output(DY);
+
CAFFE_ENFORCE(y.dim() == 1);
CAFFE_ENFORCE(dy_cache.dim() == 1);
CAFFE_ENFORCE(dy_cache.numel() > 0);
ConstEigenVectorArrayMap<float> dy_cache_vec(
dy_cache.template data<float>(), dy_cache.numel());
- dy->Resize(dy_cache.numel());
+ auto* dy = Output(DY, {dy_cache.numel()}, at::dtype<float>());
EigenVectorArrayMap<float> dy_vec(
dy->template mutable_data<float>(), dy->numel());
auto multiplier = dLoss.template data<float>();