static DLDataType getDLDataType(const Tensor& t) {
DLDataType dtype;
dtype.lanes = 1;
- dtype.bits = t.dtype().itemsize() * 8;
+ dtype.bits = t.element_size() * 8;
switch (t.scalar_type()) {
case ScalarType::Byte:
dtype.code = DLDataTypeCode::kDLUInt;
return TypeID::Undefined;
}
-size_t UndefinedType::elementSizeInBytes() const {
- AT_ERROR("elementSizeInBytes not defined for UndefinedType");
-}
-
Type & UndefinedType::toBackend(Backend b) const {
if (b == Backend::Undefined) {
return TypeDefault::toBackend(b);
virtual Storage storageWithAllocator(int64_t size, Allocator* allocator) const override;
virtual std::unique_ptr<Generator> generator() const override;
virtual const char * toString() const override;
- virtual size_t elementSizeInBytes() const override;
virtual Type & toBackend(Backend b) const override;
virtual Type & toScalarType(ScalarType s) const override;
virtual TypeID ID() const override;
virtual Tensor unsafeTensorFromTH(void * th_pointer, bool retain) const = 0;
virtual Storage unsafeStorageFromTH(void * th_pointer, bool retain) const = 0;
virtual const char * toString() const = 0;
- virtual size_t elementSizeInBytes() const = 0;
virtual Type & toBackend(Backend b) const = 0;
virtual Type & toScalarType(ScalarType s) const = 0;
Type & toSparse() const {
AdvancedIndex::AdvancedIndex(const Tensor& src, TensorList indices_list)
{
- int64_t element_size_bytes = src.type().elementSizeInBytes();
+ int64_t element_size_bytes = src.element_size();
int64_t dims_before = 0, dims_after = 0, dims_indexed = 0;
IntArrayRef replacement_shape;
for (size_t dim = 0; dim < indices_list.size(); dim++) {
auto& op = operands_[i];
if (!op.tensor.defined()) {
AT_ASSERTM(op.type, "no type for operand", i);
- int element_size = op.type->elementSizeInBytes();
+ int element_size = op.type->typeMeta().itemsize();
op.stride_bytes = compatible_stride(element_size);
auto tensor_shape = invert_perm(shape_);
int ndim = shape.size();
auto original_shape = tensor.sizes();
auto original_stride = tensor.strides();
- auto element_size_in_bytes = tensor.type().elementSizeInBytes();
+ auto element_size_in_bytes = tensor.element_size();
auto stride = DimVector(ndim, 0);
auto offset = ndim - original_shape.size();
}
ScalarType dtype(int arg=0) const { return type(arg).scalarType(); }
DeviceType device_type(int arg=0) const { return type(arg).device_type(); }
- int64_t element_size(int arg) const { return type(arg).elementSizeInBytes(); }
+ int64_t element_size(int arg) const { return type(arg).typeMeta().itemsize(); }
bool is_scalar(int arg) const;
bool is_cpu_scalar(int arg) const;
AT_CUDA_CHECK(cudaMemcpyAsync(
dst_contig.data_ptr(),
src_contig.data_ptr(),
- src.numel() * src.dtype().itemsize(),
+ src.numel() * src.element_size(),
cudaMemcpyHostToDevice,
stream));
AT_CUDA_CHECK(cudaStreamSynchronize(stream));
AT_CUDA_CHECK(cudaMemcpyAsync(
dst_contig.data_ptr(),
src_contig.data_ptr(),
- src.numel() * src.dtype().itemsize(),
+ src.numel() * src.element_size(),
cudaMemcpyDeviceToHost,
stream));
AT_CUDA_CHECK(cudaStreamSynchronize(stream));
// (see kRoundSmall and kRoundLarge in THCCachingAllocator.cpp), but we do
// need to check input tensor to make sure that it is not unaligned, e.g.,
// from a slicing.
- auto complex_size_bytes = 2 * input.type().elementSizeInBytes();
+ auto complex_size_bytes = 2 * input.element_size();
if (reinterpret_cast<std::uintptr_t>(input.data_ptr()) % complex_size_bytes != 0) {
input = input.clone();
input_was_cloned = true;
return ${TypeID};
}
-size_t ${Type}::elementSizeInBytes() const {
- return sizeof(${ScalarType});
-}
-
${type_derived_method_definitions}
}
virtual Tensor unsafeTensorFromTH(void * th_pointer, bool retain) const = 0;
virtual Storage unsafeStorageFromTH(void * th_pointer, bool retain) const = 0;
virtual const char * toString() const = 0;
- virtual size_t elementSizeInBytes() const = 0;
virtual Type & toBackend(Backend b) const = 0;
virtual Type & toScalarType(ScalarType s) const = 0;
Type & toSparse() const {
return ${TypeID};
}
-size_t ${Type}::elementSizeInBytes() const {
- return sizeof(${ScalarType});
-}
-
/* example
Tensor * ${Type}::add(Tensor & a, Tensor & b) {
std::cout << "add Tensor with backend ${Backend}\n";
virtual caffe2::TypeMeta typeMeta() const override;
virtual Backend backend() const override;
virtual const char * toString() const override;
- virtual size_t elementSizeInBytes() const override;
virtual TypeID ID() const override;
// example
return Backend::${Backend};
}
-size_t ${Type}::elementSizeInBytes() const {
- AT_ERROR("elementSizeInBytes is not implemented for ${Type}");
-}
-
${type_method_definitions}
} // namespace at
Device getDeviceFromPtr(void * data) const override;
std::unique_ptr<Generator> generator() const override;
virtual Backend backend() const override;
- virtual size_t elementSizeInBytes() const override;
${type_method_declarations}
};
caffe2::TypeMeta typeMeta() const override;
Backend backend() const override;
const char* toString() const override;
- size_t elementSizeInBytes() const override;
TypeID ID() const override;
Tensor empty(IntArrayRef size, const TensorOptions & options) const override {
return TypeID::CPUComplexFloat;
}
-size_t CPUComplexFloatType::elementSizeInBytes() const {
- return sizeof(float);
-}
-
REGISTER_COMPLEX_HOOKS(ComplexHooks);
} // namespace at
std::unique_ptr<at::Generator> generator() const override;
const char * toString() const override;
at::TypeID ID() const override;
- size_t elementSizeInBytes() const override;
at::Type & toBackend(at::Backend b) const override;
at::Type & toScalarType(at::ScalarType s) const override;
Storage unsafeStorageFromTH(void * th_pointer, bool retain) const override;
const char * VariableType::toString() const {
return str.c_str();
}
-size_t VariableType::elementSizeInBytes() const {
- return baseType->elementSizeInBytes();
-}
Type & VariableType::toBackend(Backend b) const {
return *getVariableTypeFromBaseType(baseType->toBackend(b));
}
AT_ASSERT(t.is_contiguous());
tensor_proto->set_raw_data(std::string(
static_cast<char*>(t.data_ptr()),
- t.type().elementSizeInBytes() * t.numel()));
+ t.element_size() * t.numel()));
}
}
tensor_proto->set_requires_grad(tensor.requires_grad());
uint64_t record_size =
- tensor.type().elementSizeInBytes() * tensor.storage().size();
+ tensor.element_size() * tensor.storage().size();
auto* key = tensor.storage().unsafeGetStorageImpl();
auto storage_it = storageMap.find(key);
/* stride = */ {1})
.cpu();
AT_ASSERT(
- storage_tensor.type().elementSizeInBytes() *
+ storage_tensor.element_size() *
storage_tensor.storage().size() ==
record_size);
}
python_serialized_export_map;
for (auto& kv : export_map) {
auto t = kv.second;
- size_t copy_bytes = t.type().elementSizeInBytes() * t.numel();
+ size_t copy_bytes = t.element_size() * t.numel();
// TODO: this is an unecessary copy. In theory we can directly
// return the map from identifier to Tensor, but we need some API
// in Python to get raw `bytes` containing the raw tensor data.
at::empty(sizes, at::initialTensorOptions().dtype(initial_scalar_type)));
recursiveStore((char*)tensor.data_ptr(), sizes, tensor.strides(), 0,
- tensor.type().elementSizeInBytes(), data);
+ tensor.element_size(), data);
at::ScalarType scalar_type = dtype.isNone() ? tensor.scalar_type() : dtype.toScalarType();
c10::Device dev = device.isNone() ? tensor.device() : device.toDevice();
StridedData(const Tensor & tensor)
: data(tensor.data_ptr())
, strides(tensor.strides())
- , elementSize(tensor.type().elementSizeInBytes()) {}
+ , elementSize(tensor.element_size()) {}
void* data;
IntArrayRef strides;
if (type.is_sparse()) {
const auto& indices = tensor._indices();
const auto& values = tensor._values();
- tensor_size = indices.numel() * indices.type().elementSizeInBytes() +
- values.numel() * indices.type().elementSizeInBytes();
+ tensor_size = indices.numel() * indices.element_size() +
+ values.numel() * indices.element_size();
} else {
- tensor_size = tensor.numel() * type.elementSizeInBytes();
+ tensor_size = tensor.numel() * tensor.element_size();
}
auto& type_group = groups[type.ID()];
auto tensor = autograd::make_variable(at::empty(sizes, at::initialTensorOptions().dtype(scalar_type)), /*requires_grad=*/false);
recursive_store(
(char*)tensor.data_ptr(), tensor.sizes(), tensor.strides(), 0,
- scalar_type, tensor.type().elementSizeInBytes(), data);
+ scalar_type, tensor.element_size(), data);
auto device = device_opt.has_value() ? *device_opt : at::Device(torch::getDeviceType(type));
AutoNoGIL no_gil;
maybe_initialize_cuda(device);
auto sizes = to_numpy_shape(tensor.sizes());
auto strides = to_numpy_shape(tensor.strides());
// NumPy strides use bytes. Torch strides use element counts.
- auto element_size_in_bytes = tensor.type().elementSizeInBytes();
+ auto element_size_in_bytes = tensor.element_size();
for (auto& stride : strides) {
stride *= element_size_in_bytes;
}
"allGather got input and output on different devices");
}
}
- uint64_t tensor_bytes = input.type().elementSizeInBytes() * input.numel();
+ uint64_t tensor_bytes = input.element_size() * input.numel();
uint64_t all_tensor_bytes = tensor_bytes * output.size();
auto ret = _cache->getAlgorithm<CollectiveType::ALL_GATHER, T>(
group_id,
at::Tensor& t,
THDReduceOp operation,
THDGroup group_id) {
- uint64_t tensor_bytes = t.type().elementSizeInBytes() * t.numel();
+ uint64_t tensor_bytes = t.element_size() * t.numel();
auto ret = _cache->getAlgorithm<CollectiveType::ALL_REDUCE, T>(
group_id,
_groups.at(group_id),
at::Tensor& data,
rank_type src_rank,
THDGroup group_id) {
- uint64_t tensor_bytes = data.type().elementSizeInBytes() * data.numel();
+ uint64_t tensor_bytes = data.element_size() * data.numel();
auto ret = _cache->getAlgorithm<CollectiveType::BROADCAST, T>(
group_id,
_groups.at(group_id),
memcpy(
output[group_rank].data_ptr(),
input.data_ptr(),
- input.type().elementSizeInBytes() * input.numel());
+ input.element_size() * input.numel());
auto j = group_rank, jnext = left;
for (rank_type i = 0; i < group.size(); ++i) {
memcpy(
output.at(i).data_ptr(),
input.data_ptr(),
- input.numel() * input.type().elementSizeInBytes());
+ input.numel() * input.element_size());
}
}
}
memcpy(
output.data_ptr(),
input.at(i).data_ptr(),
- output.numel() * output.type().elementSizeInBytes());
+ output.numel() * output.element_size());
}
}
}
if (!exists)
return;
- uint64_t tensor_bytes = data.type().elementSizeInBytes() * data.numel();
+ uint64_t tensor_bytes = data.element_size() * data.numel();
auto tmp_tensor = data.clone();
auto pof2 = pow2(group.size());
std::memcpy(
data.data_ptr(),
result_tensor.data_ptr(),
- data.type().elementSizeInBytes() * data.numel());
+ data.element_size() * data.numel());
}
void DataChannelTCP::broadcast(
throw std::logic_error("tensor to send is not contiguous");
// send size of tensor data in bytes
- uint64_t tensor_bytes = data.type().elementSizeInBytes() * data.numel();
+ uint64_t tensor_bytes = data.element_size() * data.numel();
send_bytes<uint64_t>(process_dst.socket, &tensor_bytes, 1, true);
// send data (bytes)
recv_bytes<uint64_t>(process_src.socket, &tensor_bytes, 1);
uint64_t actual_tensor_bytes =
- data.type().elementSizeInBytes() * data.numel();
+ data.element_size() * data.numel();
if (actual_tensor_bytes == tensor_bytes) {
recv_bytes<std::uint8_t>(
process_src.socket,
const at::Tensor& tensor1,
const at::Tensor& tensor2,
std::string prefix = std::string()) {
- bool equal = tensor1.type().elementSizeInBytes() ==
- tensor2.type().elementSizeInBytes() &&
+ bool equal = tensor1.element_size() ==
+ tensor2.element_size() &&
tensor1.numel() == tensor2.numel() && tensor1.type() == tensor2.type();
if (!prefix.empty())
}
static void memcpy_input(value_type& info, at::Tensor& t) {
- uint64_t tensor_bytes = t.type().elementSizeInBytes() * t.numel();
+ uint64_t tensor_bytes = t.element_size() * t.numel();
auto t_dev = getDeviceType(t);
auto input_buffer = GlooCache::input_buffer(info).get();
}
static void memcpy_output(value_type& info, at::Tensor& t) {
- uint64_t tensor_bytes = t.type().elementSizeInBytes() * t.numel();
+ uint64_t tensor_bytes = t.element_size() * t.numel();
auto t_dev = getDeviceType(t);
auto output_buffer = GlooCache::output_buffer(info).get();
auto& tensor = checkSingleTensor(tensors);
auto utag = checkTag(tag);
auto ptr = tensor.data_ptr();
- auto size = tensor.numel() * tensor.type().elementSizeInBytes();
+ auto size = tensor.numel() * tensor.element_size();
// Construct unbound buffer.
auto& context = contexts_[0];
auto& tensor = checkSingleTensor(tensors);
auto utag = checkTag(tag);
auto ptr = tensor.data_ptr();
- auto size = tensor.numel() * tensor.type().elementSizeInBytes();
+ auto size = tensor.numel() * tensor.element_size();
// Construct unbound buffer.
auto& context = contexts_[0];
auto& tensor = checkSingleTensor(tensors);
auto utag = checkTag(tag);
auto ptr = tensor.data_ptr();
- auto size = tensor.numel() * tensor.type().elementSizeInBytes();
+ auto size = tensor.numel() * tensor.element_size();
// Construct unbound buffer.
auto& context = contexts_[0];