set(USE_OPENMP none)
# Whether use contrib.random in runtime
-set(USE_RANDOM OFF)
+set(USE_RANDOM ON)
# Whether use NNPack
set(USE_NNPACK OFF)
* \brief mt19937 random engine
*/
#include <dmlc/logging.h>
+#include <tvm/runtime/device_api.h>
+#include <tvm/runtime/ndarray.h>
#include <algorithm>
#include <ctime>
#include <random>
+#include "../3rdparty/compiler-rt/builtin_fp16.h"
+
namespace tvm {
namespace contrib {
}
}
+ void RandomFill(DLTensor* data) {
+ int64_t size = 1;
+ for (int i = 0; i < data->ndim; ++i) {
+ size *= data->shape[i];
+ }
+
+ if (data->ctx.device_type == kDLCPU) {
+ FillData(data, size);
+ } else {
+ runtime::NDArray local = runtime::NDArray::Empty(
+ std::vector<int64_t>{data->shape, data->shape + data->ndim}, data->dtype, {kDLCPU, 0});
+ FillData(&local.ToDLPack()->dl_tensor, size);
+ runtime::NDArray::CopyFromTo(&local.ToDLPack()->dl_tensor, data);
+ }
+ }
+
+ private:
+ void FillData(DLTensor* tensor, int64_t size) {
+ // Make the value be 1.0 - 10.0, not (0.0 - 1.0) so that we could satisfy
+ // quantized dtype (uint8 / int8) data non-empty requirement
+ std::uniform_real_distribution<> dist(1.0, 10.0);
+ // Use float representation could make us work well on float / int type too.
+ if (tensor->dtype.bits == 1) {
+ std::generate_n(static_cast<bool*>(tensor->data), size, [&]() { return dist(rnd_engine_); });
+ } else if (tensor->dtype.bits == 8) {
+ std::generate_n(static_cast<uint8_t*>(tensor->data), size,
+ [&]() { return dist(rnd_engine_); });
+ } else if (tensor->dtype.bits == 16) {
+ std::generate_n(static_cast<uint16_t*>(tensor->data), size, [&]() {
+ return __truncXfYf2__<float, uint32_t, 23, uint16_t, uint16_t, 10>(
+ static_cast<float>(dist(rnd_engine_)));
+ });
+ } else if (tensor->dtype.bits == 32) {
+ std::generate_n(static_cast<float*>(tensor->data), size, [&]() { return dist(rnd_engine_); });
+ } else if (tensor->dtype.bits == 64) {
+ std::generate_n(static_cast<double*>(tensor->data), size,
+ [&]() { return dist(rnd_engine_); });
+ } else {
+ LOG(FATAL) << "Doesn't support dtype code " << tensor->dtype.code << " dtype bits "
+ << tensor->dtype.bits;
+ }
+ }
+
private:
std::mt19937 rnd_engine_;
unsigned rseed_;
entry->random_engine.SampleNormal(out, loc, scale);
});
+TVM_REGISTER_GLOBAL("tvm.contrib.random.random_fill").set_body([](TVMArgs args, TVMRetValue* ret) {
+ RandomThreadLocalEntry* entry = RandomThreadLocalEntry::ThreadLocal();
+ DLTensor* out = args[0];
+ entry->random_engine.RandomFill(out);
+});
+
} // namespace contrib
} // namespace tvm
from tvm import te
import numpy as np
from tvm.contrib import random
+from tvm import rpc
+
+def enabled_ctx_list():
+ ctx_list = [('cpu', tvm.cpu(0)),
+ ('gpu', tvm.gpu(0)),
+ ('cl', tvm.opencl(0)),
+ ('metal', tvm.metal(0)),
+ ('rocm', tvm.rocm(0)),
+ ('vulkan', tvm.vulkan(0)),
+ ('vpi', tvm.vpi(0))]
+ for k, v in ctx_list:
+ assert tvm.context(k, 0) == v
+ ctx_list = [x[1] for x in ctx_list if x[1].exist]
+ return ctx_list
+
+ENABLED_CTX_LIST = enabled_ctx_list()
def test_randint():
m = 1024
assert abs(np.std(na) - 4) < 1e-2
verify()
+def test_random_fill():
+ def test_local(ctx, dtype):
+ if not tvm.get_global_func("tvm.contrib.random.random_fill", True):
+ print("skip because extern function is not available")
+ return
+ np_ones = np.ones((512, 512), dtype=dtype)
+ value = tvm.nd.empty(np_ones.shape, np_ones.dtype, ctx)
+ random_fill = tvm.get_global_func("tvm.contrib.random.random_fill")
+ random_fill(value)
+
+ assert np.count_nonzero(value.asnumpy()) == 512 * 512
+
+ # make sure arithmentic doesn't overflow too
+ np_values = value.asnumpy()
+ assert np.isfinite(np_values * np_values + np_values).any()
+
+ def test_rpc(dtype):
+ if not tvm.get_global_func("tvm.contrib.random.random_fill", True):
+ print("skip because extern function is not available")
+ return
+ if not tvm.runtime.enabled("rpc") or not tvm.runtime.enabled("llvm"):
+ return
+ np_ones = np.ones((512, 512), dtype=dtype)
+ server = rpc.Server("localhost")
+ remote = rpc.connect(server.host, server.port)
+ value = tvm.nd.empty(np_ones.shape, np_ones.dtype, remote.cpu())
+ random_fill = remote.get_function("tvm.contrib.random.random_fill")
+ random_fill(value)
+
+ assert np.count_nonzero(value.asnumpy()) == 512 * 512
+
+ # make sure arithmentic doesn't overflow too
+ np_values = value.asnumpy()
+ assert np.isfinite(np_values * np_values + np_values).any()
+
+ for dtype in ["bool", "int8", "uint8", "int16", "uint16", "int32", "int32",
+ "int64", "uint64", "float16", "float32", "float64"]:
+ for ctx in ENABLED_CTX_LIST:
+ test_local(ctx, dtype)
+ test_rpc(dtype)
if __name__ == "__main__":
test_randint()
test_uniform()
test_normal()
+ test_random_fill()