static void relu_nnc(benchmark::State& state) {
auto N = VarHandle("N", kInt);
- Placeholder A("A", kFloat, {N});
+ BufHandle A("A", {N}, kFloat);
auto clamp = 0;
torch::jit::tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i){
auto A_elem = [&]() {
static void log_nnc_sleef(benchmark::State& state) {
auto N = VarHandle("N", kInt);
- Placeholder A("A", kFloat, {N});
+ BufHandle A("A", {N}, kFloat);
torch::jit::tensorexpr::Tensor B =
Compute("B", {N}, [&](const VarHandle& i) {
return log(A.load(i));
static void log_nnc_fast(benchmark::State& state) {
auto N = VarHandle("N", kInt);
- Placeholder A("A", kFloat, {N});
+ BufHandle A("A", {N}, kFloat);
torch::jit::tensorexpr::Tensor B =
Compute("B", {N}, [&](const VarHandle& i) {
return fast_log(A.load(i));
static void log_nnc_vml(benchmark::State& state) {
auto N = VarHandle("N", kInt);
- Placeholder A("A", kFloat, {N});
+ BufHandle A("A", {N}, kFloat);
torch::jit::tensorexpr::Tensor B =
Compute("B", {N}, [&](const VarHandle& i) {
return log_vml(A.load(i));
static void logit_nnc_sleef(benchmark::State& state) {
auto N = VarHandle("N", kInt);
- Placeholder A("A", kFloat, {N});
+ BufHandle A("A", {N}, kFloat);
auto clamp = 1e-6f;
tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i) {
auto A_elem = [&]() {
static void logit_nnc_fast(benchmark::State& state) {
auto N = VarHandle("N", kInt);
- Placeholder A("A", kFloat, {N});
+ BufHandle A("A", {N}, kFloat);
auto clamp = 1e-6f;
tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i) {
auto A_elem = [&]() {
static void logit_nnc_vml(benchmark::State& state) {
auto N = VarHandle("N", kInt);
- Placeholder A("A", kFloat, {N});
+ BufHandle A("A", {N}, kFloat);
auto clamp = 1e-6f;
tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i) {
auto A_elem = [&]() {
static void tanh_nnc_fast(benchmark::State& state) {
auto N = VarHandle("N", kInt);
- Placeholder A("A", kFloat, {N});
+ BufHandle A("A", {N}, kFloat);
torch::jit::tensorexpr::Tensor B =
Compute("B", {N}, [&](const VarHandle& i) {
return fast_tanh(A.load(i));
BENCHMARK_DEFINE_F(BatchNorm, NNC)(benchmark::State& state) {
- Placeholder input("input", kFloat, {N_, C_, H_, W_});
- Placeholder weight("weight", kFloat, {C_});
- Placeholder bias("bias", kFloat, {C_});
- Placeholder mean("mean", kFloat, {C_});
- Placeholder var("var", kFloat, {C_});
+ BufHandle input("input", {N_, C_, H_, W_}, kFloat);
+ BufHandle weight("weight", {C_}, kFloat);
+ BufHandle bias("bias", {C_}, kFloat);
+ BufHandle mean("mean", {C_}, kFloat);
+ BufHandle var("var", {C_}, kFloat);
VarHandle eps("eps", kFloat);
using axis = const VarHandle&;
BENCHMARK_DEFINE_F(BatchNorm, NNCRelu)(benchmark::State& state) {
- Placeholder input("input", kFloat, {N_, C_, H_, W_});
- Placeholder weight("weight", kFloat, {C_});
- Placeholder bias("bias", kFloat, {C_});
- Placeholder mean("mean", kFloat, {C_});
- Placeholder var("var", kFloat, {C_});
+ BufHandle input("input", {N_, C_, H_, W_}, kFloat);
+ BufHandle weight("weight", {C_}, kFloat);
+ BufHandle bias("bias", {C_}, kFloat);
+ BufHandle mean("mean", {C_}, kFloat);
+ BufHandle var("var", {C_}, kFloat);
VarHandle eps("eps", kFloat);
using axis = const VarHandle&;
for (auto _ : state) {
constexpr int N = 512;
te::VarHandle n("n", te::kInt);
- te::Placeholder A(te::BufHandle("A", {N}, te::kFloat));
+ te::BufHandle A("A", {N}, te::kFloat);
te::Tensor relu = te::Compute("relu", {{n, "n"}}, [&](const te::VarHandle& i) {
return te::Max::make(A.load(i), 0.f, false);
});
static void BM_CompileSwishLLVMOnly(benchmark::State& state) {
constexpr int N = 512;
te::VarHandle n("n", te::kInt);
- te::Placeholder A(te::BufHandle("A", {N}, te::kFloat));
+ te::BufHandle A("A", {N}, te::kFloat);
te::Tensor relu = te::Compute("relu", {{n, "n"}}, [&](const te::VarHandle& i) {
return te::Max::make(A.load(i), 0.f, false);
});
size_t num_inputs = inputs_.size();
size_t num_dims = 2;
- std::vector<Placeholder> inputs;
+ std::vector<BufHandle> inputs;
for (size_t i = 0; i < num_inputs; ++i) {
- inputs.emplace_back(Placeholder(
+ inputs.emplace_back(BufHandle(
"input" + std::to_string(i),
- kFloat,
- {input_sizes_[i][0], input_sizes_[i][1]}));
+ {input_sizes_[i][0], input_sizes_[i][1]},
+ kFloat));
}
Tensor output = Compute(
{alloc<IntImm>(output_size_[0]), alloc<IntImm>(output_size_[1])}),
kFloat);
- std::vector<Placeholder> inputs;
+ std::vector<BufHandle> inputs;
std::vector<StmtPtr> for_stmts(num_inputs);
int cumulative_input_sizes = 0;
for (size_t i = 0; i < num_inputs; ++i) {
- inputs.emplace_back(Placeholder(
+ inputs.emplace_back(BufHandle(
"input" + std::to_string(i),
- kFloat,
- {input_sizes_[i][0], input_sizes_[i][1]}));
+ {input_sizes_[i][0], input_sizes_[i][1]},
+ kFloat));
std::vector<VarPtr> for_vars(num_inputs);
for (size_t d = 0; d < num_dims; ++d) {
for_vars[d] =
{for_vars[0],
alloc<Add>(for_vars[1], alloc<IntImm>(cumulative_input_sizes))}),
alloc<Load>(
- inputs[i].data(),
+ inputs[i].node(),
std::vector<ExprPtr>({for_vars[0], for_vars[1]})));
auto for_st = alloc<For>(
for_vars[0],
BENCHMARK_DEFINE_F(Gemm, TensorExprNoopt)(benchmark::State& state) {
- te::Placeholder AP(te::BufHandle("A", {M, K}, te::kFloat));
- te::Placeholder BP(te::BufHandle("B", {K, N}, te::kFloat));
+ te::BufHandle AP("A", {M, K}, te::kFloat);
+ te::BufHandle BP("B", {K, N}, te::kFloat);
te::Tensor CT = te::Reduce(
"gemm",
{{M, "M"}, {N, "N"}},
BENCHMARK_DEFINE_F(Gemm, TensorExprTile32x32)(benchmark::State& state) {
- te::Placeholder AP(te::BufHandle("A", {M, K}, te::kFloat));
- te::Placeholder BP(te::BufHandle("B", {K, N}, te::kFloat));
+ te::BufHandle AP("A", {M, K}, te::kFloat);
+ te::BufHandle BP("B", {K, N}, te::kFloat);
te::Tensor CT = te::Reduce(
"gemm",
{{M, "M"}, {N, "N"}},
BENCHMARK_DEFINE_F(Gemm, TensorExprTile4x16)(benchmark::State& state) {
- te::Placeholder AP(te::BufHandle("A", {M, K}, te::kFloat));
- te::Placeholder BP(te::BufHandle("B", {K, N}, te::kFloat));
+ te::BufHandle AP("A", {M, K}, te::kFloat);
+ te::BufHandle BP("B", {K, N}, te::kFloat);
te::Tensor CT = te::Reduce(
"gemm",
{{M, "M"}, {N, "N"}},
BENCHMARK_DEFINE_F(Gemm, TensorExprTile4x16VecUnroll)(benchmark::State& state) {
- te::Placeholder AP(te::BufHandle("A", {M, K}, te::kFloat));
- te::Placeholder BP(te::BufHandle("B", {K, N}, te::kFloat));
+ te::BufHandle AP("A", {M, K}, te::kFloat);
+ te::BufHandle BP("B", {K, N}, te::kFloat);
te::Tensor CT = te::Reduce(
"gemm",
{{M, "M"}, {N, "N"}},
BENCHMARK_DEFINE_F(Gemm, TensorExprTile4x16Cache)(benchmark::State& state) {
- te::Placeholder AP(te::BufHandle("A", {M, K}, te::kFloat));
- te::Placeholder BP(te::BufHandle("B", {K, N}, te::kFloat));
+ te::BufHandle AP("A", {M, K}, te::kFloat);
+ te::BufHandle BP("B", {K, N}, te::kFloat);
te::Tensor CT = te::Reduce(
"gemm",
{{M, "M"}, {N, "N"}},
};
BENCHMARK_DEFINE_F(ParallelAdd, Simple)(benchmark::State& state) {
- Placeholder a_buf("a", kFloat, {M});
- Placeholder b_buf("b", kFloat, {M});
+ BufHandle a_buf("a", {M}, kFloat);
+ BufHandle b_buf("b", {M}, kFloat);
Tensor c_tensor = Compute(
"c", {{M, "m"}}, [&](const VarHandle& m) {
return a_buf.load(m) + b_buf.load(m);
int M = A.numel();
- te::Placeholder AP(te::BufHandle("A", {M}, te::kFloat));
+ te::BufHandle AP("A", {M}, te::kFloat);
te::Tensor BT = te::Reduce(
"reduce_full",
{{1, "N"}},
int M = A.numel();
- te::Placeholder AP(te::BufHandle("A", {M}, te::kFloat));
+ te::BufHandle AP("A", {M}, te::kFloat);
te::Tensor BT = te::Reduce(
"reduce_full",
{{1, "N"}},
int M = A.numel();
- te::Placeholder AP(te::BufHandle("A", {M}, te::kFloat));
+ te::BufHandle AP("A", {M}, te::kFloat);
te::Tensor BT = te::Reduce(
"reduce_full",
{{1, "N"}},
const int kChunkSize = 8;
TORCH_CHECK(M % kChunkSize == 0);
- te::Placeholder AP(te::BufHandle("A", {M}, te::kFloat));
+ te::BufHandle AP("A", {M}, te::kFloat);
te::Tensor BT = te::Reduce(
"reduce_full",
{},
const int M = A.numel();
const int kChunkSize = 8;
- te::Placeholder a("A", te::kFloat, {M});
- te::Tensor b = te::computeSum({a.handle(), te::IntList({0}), false}, at::kFloat);
+ te::BufHandle a("A", {M}, te::kFloat);
+ te::Tensor b = te::computeSum({a, te::IntList({0}), false}, at::kFloat);
te::LoopNest nest({b});
auto loops = nest.getLoopStmtsFor(b);
BENCHMARK_DEFINE_F(Reduce2DCol, OpSchedule)(benchmark::State& state) {
constexpr int kCacheSize = 1 << 12;
- te::Placeholder a("A", te::kFloat, {M, N});
- te::Tensor b = te::computeSum({a.handle(), te::IntList({0}), false}, at::kFloat);
+ te::BufHandle a("A", {M, N}, te::kFloat);
+ te::Tensor b = te::computeSum({a, te::IntList({0}), false}, at::kFloat);
te::LoopNest nest({b});
auto sch = state.range(2);
BENCHMARK_DEFINE_F(Reduce2DRow, OpSchedule)(benchmark::State& state) {
constexpr int kChunkSize = 8;
- te::Placeholder a("A", te::kFloat, {M, N});
- te::Tensor b = te::computeSum({a.handle(), te::IntList({1}), false}, at::kFloat);
+ te::BufHandle a("A", {M, N}, te::kFloat);
+ te::Tensor b = te::computeSum({a, te::IntList({1}), false}, at::kFloat);
te::LoopNest nest({b});
auto sch = state.range(2);
}
void runNNC(benchmark::State& state) {
- Placeholder input_ph(
- "input", kFloat, {input_size_int_[0], input_size_int_[1]});
+ BufHandle input_ph(
+ "input", {input_size_int_[0], input_size_int_[1]}, kFloat);
Tensor abs_result = Compute(
"aten_abs",
{{input_size_int_[0], "M"}, {input_size_int_[1], "N"}},
[&](const VarHandle& m, const VarHandle& n) {
return log1p(abs_result.load(m, n));
});
- Tensor sign_result = computeSign(
- {input_ph.handle()}, {input_size_int_[0], input_size_int_[1]});
+ Tensor sign_result =
+ computeSign({input_ph}, {input_size_int_[0], input_size_int_[1]});
Tensor output = Compute(
"aten_mul",
{{input_size_int_[0], "M"}, {input_size_int_[1], "N"}},
}
void runNNCLogVml(benchmark::State& state) {
- Placeholder input_ph(
- "input", kFloat, {input_size_int_[0], input_size_int_[1]});
+ BufHandle input_ph(
+ "input", {input_size_int_[0], input_size_int_[1]}, kFloat);
Tensor abs_result = Compute(
"aten_abs",
{{input_size_int_[0], "M"}, {input_size_int_[1], "N"}},
[&](const VarHandle& m, const VarHandle& n) {
return log_vml(abs_result.load(m, n) + ExprHandle(1));
});
- Tensor sign_result = computeSign(
- {input_ph.handle()}, {input_size_int_[0], input_size_int_[1]});
+ Tensor sign_result =
+ computeSign({input_ph}, {input_size_int_[0], input_size_int_[1]});
Tensor output = Compute(
"aten_mul",
{{input_size_int_[0], "M"}, {input_size_int_[1], "N"}},
TEST(Approx, log_vml) {
te::VarHandle N("N", te::kInt);
- te::Placeholder A("A", te::kFloat, {N});
+ te::BufHandle A("A", {N}, te::kFloat);
te::Tensor B = te::Compute(
"B", {N}, [&](const te::VarHandle& i) { return log_vml(A.load(i)); });
TEST(ATen, _cast_Float) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kInt));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kInt);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, negInt) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kInt));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kInt));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kInt);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kInt);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, negFloat) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, addInt) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kInt));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kInt));
- Placeholder c_buf(BufHandle("C", {ExprHandle(kTotalSize)}, kInt));
- Placeholder d_buf(BufHandle("D", {ExprHandle(kTotalSize)}, kInt));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kInt);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kInt);
+ BufHandle c_buf("C", {ExprHandle(kTotalSize)}, kInt);
+ BufHandle d_buf("D", {ExprHandle(kTotalSize)}, kInt);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, addFloat) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder c_buf(BufHandle("C", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder d_buf(BufHandle("D", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle c_buf("C", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle d_buf("D", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, subInt) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kInt));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kInt));
- Placeholder c_buf(BufHandle("C", {ExprHandle(kTotalSize)}, kInt));
- Placeholder d_buf(BufHandle("D", {ExprHandle(kTotalSize)}, kInt));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kInt);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kInt);
+ BufHandle c_buf("C", {ExprHandle(kTotalSize)}, kInt);
+ BufHandle d_buf("D", {ExprHandle(kTotalSize)}, kInt);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, subFloat) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder c_buf(BufHandle("C", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder d_buf(BufHandle("D", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle c_buf("C", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle d_buf("D", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, lerp) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder c_buf(BufHandle("C", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder d_buf(BufHandle("D", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle c_buf("C", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle d_buf("D", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, addcmulInt) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kInt));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kInt));
- Placeholder c_buf(BufHandle("C", {ExprHandle(kTotalSize)}, kInt));
- Placeholder d_buf(BufHandle("D", {ExprHandle(kTotalSize)}, kInt));
- Placeholder e_buf(BufHandle("E", {ExprHandle(kTotalSize)}, kInt));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kInt);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kInt);
+ BufHandle c_buf("C", {ExprHandle(kTotalSize)}, kInt);
+ BufHandle d_buf("D", {ExprHandle(kTotalSize)}, kInt);
+ BufHandle e_buf("E", {ExprHandle(kTotalSize)}, kInt);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, addcmulFloat) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder c_buf(BufHandle("C", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder d_buf(BufHandle("D", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder e_buf(BufHandle("E", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle c_buf("C", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle d_buf("D", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle e_buf("E", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, mulInt) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kInt));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kInt));
- Placeholder c_buf(BufHandle("C", {ExprHandle(kTotalSize)}, kInt));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kInt);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kInt);
+ BufHandle c_buf("C", {ExprHandle(kTotalSize)}, kInt);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, mulFloat) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder c_buf(BufHandle("C", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle c_buf("C", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, divInt) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kInt));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kInt));
- Placeholder c_buf(BufHandle("C", {ExprHandle(kTotalSize)}, kInt));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kInt);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kInt);
+ BufHandle c_buf("C", {ExprHandle(kTotalSize)}, kInt);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, divFloat) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder c_buf(BufHandle("C", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle c_buf("C", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, maxInt) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kInt));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kInt));
- Placeholder c_buf(BufHandle("C", {ExprHandle(kTotalSize)}, kInt));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kInt);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kInt);
+ BufHandle c_buf("C", {ExprHandle(kTotalSize)}, kInt);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, maxFloat) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder c_buf(BufHandle("C", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle c_buf("C", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, minInt) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kInt));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kInt));
- Placeholder c_buf(BufHandle("C", {ExprHandle(kTotalSize)}, kInt));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kInt);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kInt);
+ BufHandle c_buf("C", {ExprHandle(kTotalSize)}, kInt);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, minFloat) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder c_buf(BufHandle("C", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle c_buf("C", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
void __ubsan_ignore_float_divide_by_zero__ testATenreciprocal() {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, reluInt) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kInt));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kInt));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kInt);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kInt);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, reluFloat) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, logFloat) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, fastLogFloat) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, fastTanhFloat) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, fastSigmoidFloat) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, log10Float) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, log2Float) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, expFloat) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, erfFloat) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, cosFloat) {
const int kTotalSize = 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
TEST(ATen, eqInt) {
constexpr int N = 128;
- Placeholder a(BufHandle("A", {N}, kInt));
- Placeholder b(BufHandle("B", {N}, kInt));
- Placeholder c(BufHandle("C", {N}, kInt));
+ BufHandle a("A", {N}, kInt);
+ BufHandle b("B", {N}, kInt);
+ BufHandle c("C", {N}, kInt);
std::vector<int> a_buffer(N, 1);
std::vector<int> b_buffer(N, 1);
std::vector<int> c_buffer(N, 0);
TEST(ATen, geInt) {
constexpr int N = 128;
- Placeholder a(BufHandle("A", {N}, kInt));
- Placeholder b(BufHandle("B", {N}, kInt));
- Placeholder c(BufHandle("C", {N}, kInt));
+ BufHandle a("A", {N}, kInt);
+ BufHandle b("B", {N}, kInt);
+ BufHandle c("C", {N}, kInt);
std::vector<int> a_buffer(N, 5);
std::vector<int> b_buffer(N, 5);
std::vector<int> c_buffer(N, 0);
TEST(ATen, gtInt) {
constexpr int N = 128;
- Placeholder a(BufHandle("A", {N}, kInt));
- Placeholder b(BufHandle("B", {N}, kInt));
- Placeholder c(BufHandle("C", {N}, kInt));
+ BufHandle a("A", {N}, kInt);
+ BufHandle b("B", {N}, kInt);
+ BufHandle c("C", {N}, kInt);
std::vector<int> a_buffer(N, 6);
std::vector<int> b_buffer(N, 3);
std::vector<int> c_buffer(N, 0);
TEST(ATen, leInt) {
constexpr int N = 128;
- Placeholder a(BufHandle("A", {N}, kInt));
- Placeholder b(BufHandle("B", {N}, kInt));
- Placeholder c(BufHandle("C", {N}, kInt));
+ BufHandle a("A", {N}, kInt);
+ BufHandle b("B", {N}, kInt);
+ BufHandle c("C", {N}, kInt);
std::vector<int> a_buffer(N, 5);
std::vector<int> b_buffer(N, 5);
std::vector<int> c_buffer(N, 0);
TEST(ATen, ltInt) {
constexpr int N = 128;
- Placeholder a(BufHandle("A", {N}, kInt));
- Placeholder b(BufHandle("B", {N}, kInt));
- Placeholder c(BufHandle("C", {N}, kInt));
+ BufHandle a("A", {N}, kInt);
+ BufHandle b("B", {N}, kInt);
+ BufHandle c("C", {N}, kInt);
std::vector<int> a_buffer(N, 5);
std::vector<int> b_buffer(N, 5);
std::vector<int> c_buffer(N, 1);
// For this loop bounds inference should yield the following:
// {{b, kStore, 0, 99}, {a, kLoad, 0, 99}}
ExprHandle n(100);
- Placeholder a(BufHandle("a", {n}, kFloat));
+ BufHandle a("a", {n}, kFloat);
Tensor b =
Compute("b", {{n, "i"}}, [&](const VarHandle& i) { return a.load(i); });
LoopNest l({b});
// We should have two entries: one for 'b' and one for 'a'.
ASSERT_EQ(bounds_info.size(), 2);
- ASSERT_EQ(bounds_info.at(a.data()).size(), 1);
- ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
- verifyConstBounds(bounds_info.at(a.data())[0], {{0, 99}});
+ ASSERT_EQ(bounds_info.at(a.node()).size(), 1);
+ ASSERT_EQ(bounds_info.at(a.node())[0].kind, kLoad);
+ verifyConstBounds(bounds_info.at(a.node())[0], {{0, 99}});
ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kStore);
// For this loop bounds inference should yield the following:
// {{b, kStore, 0, n-1}, {a, kLoad, 0, n-1}}
VarHandle n("n", kInt);
- Placeholder a(BufHandle("a", {n}, kFloat));
+ BufHandle a("a", {n}, kFloat);
Tensor b =
Compute("b", {{n, "i"}}, [&](const VarHandle& i) { return a.load(i); });
LoopNest l({b});
// We should have two entries: one for 'b' and one for 'a'.
ASSERT_EQ(bounds_info.size(), 2);
- ASSERT_EQ(bounds_info.at(a.data()).size(), 1);
- ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
- verifyConstBounds(bounds_info.at(a.data())[0], {{0, -1}});
+ ASSERT_EQ(bounds_info.at(a.node()).size(), 1);
+ ASSERT_EQ(bounds_info.at(a.node())[0].kind, kLoad);
+ verifyConstBounds(bounds_info.at(a.node())[0], {{0, -1}});
ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kStore);
// For this loop bounds inference should yield the following:
// {{b, kStore, 0, 99}, {a, kLoad, 0, 109}}
ExprHandle n(100);
- Placeholder a(BufHandle("a", {n + 10}, kFloat));
+ BufHandle a("a", {n + 10}, kFloat);
Tensor b = Compute("b", {{n, "i"}}, [&](const VarHandle& i) {
return a.load(i) * a.load(i + 10);
});
// We should have two entries: one for 'b' and one for 'a'.
ASSERT_EQ(bounds_info.size(), 2);
- ASSERT_EQ(bounds_info.at(a.data()).size(), 1);
- ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
- verifyConstBounds(bounds_info.at(a.data())[0], {{0, 109}});
+ ASSERT_EQ(bounds_info.at(a.node()).size(), 1);
+ ASSERT_EQ(bounds_info.at(a.node())[0].kind, kLoad);
+ verifyConstBounds(bounds_info.at(a.node())[0], {{0, 109}});
ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kStore);
// c[y,x] = a[y,x] * b[y,x]
ExprHandle W(320);
ExprHandle H(200);
- Placeholder a(BufHandle("a", {H, W}, kFloat));
+ BufHandle a("a", {H, W}, kFloat);
Tensor b = Compute(
"b", {{H, "y"}, {W, "x"}}, [&](const VarHandle& y, const VarHandle& x) {
return x * y;
auto bounds_info = inferBounds(loops[0]);
ASSERT_EQ(bounds_info.size(), 3);
- ASSERT_EQ(bounds_info.at(a.data()).size(), 1);
- ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
- verifyConstBounds(bounds_info.at(a.data())[0], {{0, 199}, {0, 319}});
+ ASSERT_EQ(bounds_info.at(a.node()).size(), 1);
+ ASSERT_EQ(bounds_info.at(a.node())[0].kind, kLoad);
+ verifyConstBounds(bounds_info.at(a.node())[0], {{0, 199}, {0, 319}});
ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kLoad);
auto bounds_info = inferBounds(loops[1]);
ASSERT_EQ(bounds_info.size(), 3);
- ASSERT_EQ(bounds_info.at(a.data()).size(), 1);
- ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
- verifyConstBounds(bounds_info.at(a.data())[0], {{-1, -1}, {0, 319}});
+ ASSERT_EQ(bounds_info.at(a.node()).size(), 1);
+ ASSERT_EQ(bounds_info.at(a.node())[0].kind, kLoad);
+ verifyConstBounds(bounds_info.at(a.node())[0], {{-1, -1}, {0, 319}});
ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kLoad);
auto bounds_info = inferBounds(body);
ASSERT_EQ(bounds_info.size(), 3);
- ASSERT_EQ(bounds_info.at(a.data()).size(), 1);
- ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
- verifyConstBounds(bounds_info.at(a.data())[0], {{-1, -1}, {-1, -1}});
+ ASSERT_EQ(bounds_info.at(a.node()).size(), 1);
+ ASSERT_EQ(bounds_info.at(a.node())[0].kind, kLoad);
+ verifyConstBounds(bounds_info.at(a.node())[0], {{-1, -1}, {-1, -1}});
ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kLoad);
// for i_tail in 0..100%16:
// b[i_tail + (100/16)*16] = a[i_tail + (100/16)*16];
ExprHandle n(100);
- Placeholder a(BufHandle("a", {n}, kFloat));
+ BufHandle a("a", {n}, kFloat);
Tensor b =
Compute("b", {{n, "i"}}, [&](const VarHandle& i) { return a.load(i); });
LoopNest l({b});
auto bounds_info = inferBounds(outer);
ASSERT_EQ(bounds_info.size(), 2);
- ASSERT_EQ(bounds_info.at(a.data()).size(), 1);
- ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
- verifyConstBounds(bounds_info.at(a.data())[0], {{0, 95}});
+ ASSERT_EQ(bounds_info.at(a.node()).size(), 1);
+ ASSERT_EQ(bounds_info.at(a.node())[0].kind, kLoad);
+ verifyConstBounds(bounds_info.at(a.node())[0], {{0, 95}});
ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kStore);
auto bounds_info = inferBounds(tail);
ASSERT_EQ(bounds_info.size(), 2);
- ASSERT_EQ(bounds_info.at(a.data()).size(), 1);
- ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
- verifyConstBounds(bounds_info.at(a.data())[0], {{96, 99}});
+ ASSERT_EQ(bounds_info.at(a.node()).size(), 1);
+ ASSERT_EQ(bounds_info.at(a.node())[0].kind, kLoad);
+ verifyConstBounds(bounds_info.at(a.node())[0], {{96, 99}});
ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kStore);
ExprHandle H(200);
ExprHandle CW(32);
ExprHandle CH(20);
- Placeholder a(BufHandle("a", {H, W}, kFloat));
+ BufHandle a("a", {H, W}, kFloat);
Tensor b = Compute(
"b", {{H, "y"}, {W, "x"}}, [&](const VarHandle& y, const VarHandle& x) {
return x * y;
auto bounds_info = inferBounds(loops[0]);
ASSERT_EQ(bounds_info.size(), 3);
- ASSERT_EQ(bounds_info.at(a.data()).size(), 1);
- ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
- verifyConstBounds(bounds_info.at(a.data())[0], {{100, 119}, {100, 131}});
+ ASSERT_EQ(bounds_info.at(a.node()).size(), 1);
+ ASSERT_EQ(bounds_info.at(a.node())[0].kind, kLoad);
+ verifyConstBounds(bounds_info.at(a.node())[0], {{100, 119}, {100, 131}});
ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kLoad);
auto bounds_info = inferBounds(loops[1]);
ASSERT_EQ(bounds_info.size(), 3);
- ASSERT_EQ(bounds_info.at(a.data()).size(), 1);
- ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
- verifyConstBounds(bounds_info.at(a.data())[0], {{-1, -1}, {100, 131}});
+ ASSERT_EQ(bounds_info.at(a.node()).size(), 1);
+ ASSERT_EQ(bounds_info.at(a.node())[0].kind, kLoad);
+ verifyConstBounds(bounds_info.at(a.node())[0], {{-1, -1}, {100, 131}});
ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kLoad);
auto bounds_info = inferBounds(body);
ASSERT_EQ(bounds_info.size(), 3);
- ASSERT_EQ(bounds_info.at(a.data()).size(), 1);
- ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
- verifyConstBounds(bounds_info.at(a.data())[0], {{-1, -1}, {-1, -1}});
+ ASSERT_EQ(bounds_info.at(a.node()).size(), 1);
+ ASSERT_EQ(bounds_info.at(a.node())[0].kind, kLoad);
+ verifyConstBounds(bounds_info.at(a.node())[0], {{-1, -1}, {-1, -1}});
ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kLoad);
TEST(BoundsInference, Adjacent) {
ExprHandle H(6);
- Placeholder a(BufHandle("a", {20}, kFloat));
+ BufHandle a("a", {20}, kFloat);
Tensor b =
Compute("b", {{H, "x"}}, [&](const VarHandle& x) { return a.load(x); });
Tensor c = Compute(
ASSERT_EQ(bounds_info.size(), 2);
// reads from a[0:5], writes to b[0:5]
- ASSERT_EQ(bounds_info.at(a.data()).size(), 1);
- ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
- verifyConstBounds(bounds_info.at(a.data())[0], {{0, 5}});
+ ASSERT_EQ(bounds_info.at(a.node()).size(), 1);
+ ASSERT_EQ(bounds_info.at(a.node())[0].kind, kLoad);
+ verifyConstBounds(bounds_info.at(a.node())[0], {{0, 5}});
ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kStore);
ASSERT_EQ(bounds_info.size(), 2);
// reads from a[0+6:5+6], writes to c[0:5]
- ASSERT_EQ(bounds_info.at(a.data()).size(), 1);
- ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
- verifyConstBounds(bounds_info.at(a.data())[0], {{6, 11}});
+ ASSERT_EQ(bounds_info.at(a.node()).size(), 1);
+ ASSERT_EQ(bounds_info.at(a.node())[0].kind, kLoad);
+ verifyConstBounds(bounds_info.at(a.node())[0], {{6, 11}});
ASSERT_EQ(bounds_info.at(c.buf()).size(), 1);
ASSERT_EQ(bounds_info.at(c.buf())[0].kind, kStore);
// Should be union of above 2 bounds, but this time the bounds of A can be
// merged.
- ASSERT_EQ(bounds_info.at(a.data()).size(), 1);
- ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
- verifyConstBounds(bounds_info.at(a.data())[0], {{0, 11}});
+ ASSERT_EQ(bounds_info.at(a.node()).size(), 1);
+ ASSERT_EQ(bounds_info.at(a.node())[0].kind, kLoad);
+ verifyConstBounds(bounds_info.at(a.node())[0], {{0, 11}});
ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kStore);
}
TEST(BoundsInference, MultipleTopLoopLoad) {
- Placeholder a(BufHandle("a", {100}, kFloat));
+ BufHandle a("a", {100}, kFloat);
Tensor b =
Compute("b", {{64, "x"}}, [&](const VarHandle& x) { return a.load(x); });
Tensor c = Compute(
// a only read.
{
- auto bounds = bounds_info[a.data()];
+ auto bounds = bounds_info[a.node()];
ASSERT_EQ(bounds.size(), 1);
// One dimension.
auto bound = bounds[0];
constexpr int kPad = 1, kStride = 2, kGroups = C;
constexpr int CperG = C / kGroups;
- te::Placeholder input("input", te::kFloat, {N, C, H, W});
- te::Placeholder weight("weight", te::kFloat, {K, CperG, R, S});
- te::Placeholder bias("bias", te::kFloat, {K});
- te::Tensor output = te::conv2d_depthwise(
- input.handle(), weight.handle(), bias.handle(), kStride, kPad, kGroups);
+ te::BufHandle input("input", {N, C, H, W}, te::kFloat);
+ te::BufHandle weight("weight", {K, CperG, R, S}, te::kFloat);
+ te::BufHandle bias("bias", {K}, te::kFloat);
+ te::Tensor output =
+ te::conv2d_depthwise(input, weight, bias, kStride, kPad, kGroups);
te::LoopNest loop({output});
loop.simplify();
constexpr int kPad = 1, kStride = 2, kGroups = C;
constexpr int CperG = C / kGroups;
- te::Placeholder input("input", te::kFloat, {N, C, H, W});
- te::Placeholder weight("weight", te::kFloat, {K, CperG, R, S});
- te::Tensor output = te::conv2d_depthwise(
- input.handle(), weight.handle(), kStride, kPad, kGroups);
+ te::BufHandle input("input", {N, C, H, W}, te::kFloat);
+ te::BufHandle weight("weight", {K, CperG, R, S}, te::kFloat);
+ te::Tensor output =
+ te::conv2d_depthwise(input, weight, kStride, kPad, kGroups);
te::LoopNest loop({output});
loop.simplify();
te::VarHandle kStride_var("kStride", te::kInt);
te::VarHandle kGroups_var("kGroups", te::kInt);
- te::Placeholder input("input", te::kFloat, {N_var, C_var, H_var, W_var});
- te::Placeholder weight(
- "weight", te::kFloat, {K_var, CperG_var, R_var, S_var});
+ te::BufHandle input("input", {N_var, C_var, H_var, W_var}, te::kFloat);
+ te::BufHandle weight("weight", {K_var, CperG_var, R_var, S_var}, te::kFloat);
te::Tensor output = te::conv2d_depthwise(
- input.handle(),
- weight.handle(),
+ input,
+ weight,
N_var,
C_var,
H_var,
ASSERT_EQ(ref.size(2), OH);
ASSERT_EQ(ref.size(3), OW);
- te::Placeholder inputB(te::BufHandle("input", {N, C, H, W}, te::kFloat));
- te::Placeholder filterB(te::BufHandle("filter", {K, C, R, S}, te::kFloat));
+ te::BufHandle inputB("input", {N, C, H, W}, te::kFloat);
+ te::BufHandle filterB("filter", {K, C, R, S}, te::kFloat);
te::Tensor conv = te::Reduce(
"conv",
}
TEST(CppPrinter, LoadStore) {
- Placeholder a(BufHandle("A", {2, 3}, kInt));
- Placeholder b(BufHandle("B", {3, 4}, kInt));
+ BufHandle a("A", {2, 3}, kInt);
+ BufHandle b("B", {3, 4}, kInt);
auto store = b.store({2, 2}, a.load(1, 1));
STR_CHECK(
store, "B[(0 + 2 * (1 * 4)) + 2 * 1] = A[(0 + 1 * (1 * 3)) + 1 * 1];\n");
TEST(CppPrinter, For) {
constexpr int N = 1024;
- Placeholder a(BufHandle("A", {N}, kInt));
- Placeholder b(BufHandle("B", {N}, kInt));
- Placeholder c(BufHandle("C", {N}, kInt));
+ BufHandle a("A", {N}, kInt);
+ BufHandle b("B", {N}, kInt);
+ BufHandle c("C", {N}, kInt);
VarHandle i("i", kInt);
auto f = For::make(i, 0, N, c.store({i}, Add::make(a.load(i), b.load(i))));
const std::string pattern = R"(
}
TEST(CppPrinter, Cond) {
- Placeholder x(BufHandle("X", {1}, kInt));
+ BufHandle x("X", {1}, kInt);
auto cmp = CompareSelect::make(x.load(0), 10, CompareSelectOperation::kLT);
auto cond =
Cond::make(cmp, x.store({0}, x.load(0) + 1), x.store({0}, x.load(0) - 1));
const int block_count = 16;
const int block_size = 128;
Dtype dtype = ToDtype<ctype>();
- Placeholder a_buf("a", dtype, {num_iter, block_count, block_size});
- Placeholder b_buf("b", dtype, {num_iter, block_count, block_size});
+ BufHandle a_buf("a", {num_iter, block_count, block_size}, dtype);
+ BufHandle b_buf("b", {num_iter, block_count, block_size}, dtype);
Tensor c = Compute(
"c",
{
const int block_count = 16;
const int block_size = 128;
Dtype dtype = ToDtype<float>();
- Placeholder a_buf("a", dtype, {num_iter, block_count, block_size});
+ BufHandle a_buf("a", {num_iter, block_count, block_size}, dtype);
Tensor c = Compute(
"c",
{
}
static void testCudaTestVectorAdd02_impl(int N, int block_size) {
- Placeholder a_buf("a", kFloat, {N});
- Placeholder b_buf("b", kFloat, {N});
+ BufHandle a_buf("a", {N}, kFloat);
+ BufHandle b_buf("b", {N}, kFloat);
Tensor c = Compute(
"c",
{
TEST(Cuda, HalfCast_CUDA) {
auto half = ToDtype<at::Half>();
- Placeholder a("a", half, {4});
+ BufHandle a("a", {4}, half);
Tensor b = Compute("b", {{4, "n"}}, [&](const VarHandle& i) {
return Cast::make(kFloat, a.load(i));
});
auto testWithSize = [](int32_t M, int32_t N) {
VarHandle m("m", kInt);
VarHandle n("n", kInt);
- Placeholder a(BufHandle("a", {m, n}, kFloat));
- Placeholder b(BufHandle("b", {m, n}, kFloat));
+ BufHandle a("a", {m, n}, kFloat);
+ BufHandle b("b", {m, n}, kFloat);
Tensor c = Compute(
"c", {{m, "m"}, {n, "n"}}, [&](const VarHandle& i, const VarHandle& j) {
return a.load(i, j) + b.load(i, j);
TEST(Cuda, DynamicShapeSplit_CUDA) {
constexpr int N = 4096;
VarHandle n("n", kInt);
- Placeholder a(BufHandle("a", {n}, kFloat));
+ BufHandle a("a", {n}, kFloat);
Tensor b = Compute(
"b", {{n, "n"}}, [&](const VarHandle& i) { return a.load(i) * 2.0f; });
LoopNest l({b});
TEST(Cuda, OneBlockOneThreadGlobalReduce1_CUDA) {
const static int N = 1024;
- Placeholder data_buf("data", kFloat, {N});
- Placeholder output_buf("output", kFloat, {1});
+ BufHandle data_buf("data", {N}, kFloat);
+ BufHandle output_buf("output", {1}, kFloat);
// The test adds the following code for trivial reduction:
// for (int bidx = 0; bidx < 1; bidx++) { // blockIdx.x
StorePtr init_store = output_buf.store({0}, 0.f);
VarHandle i1("i1", kInt);
- ExprHandle load_data = Load::make(BufHandle(data_buf.data()), {i1});
- ExprHandle load_output = Load::make(BufHandle(output_buf.data()), {0});
+ ExprHandle load_data = Load::make(data_buf, {i1});
+ ExprHandle load_output = Load::make(output_buf, {0});
ExprHandle add_value = load_output + load_data;
StorePtr store_output = output_buf.store({0}, add_value);
ForPtr for_output = For::make(i1, 0, N, store_output);
// b[0] = b[0] + a[t] // implied atomic
// clang-format on
- Placeholder a_buf("a", kFloat, {N});
- Placeholder b_buf("b", kFloat, {1});
+ BufHandle a_buf("a", {N}, kFloat);
+ BufHandle b_buf("b", {1}, kFloat);
StorePtr init_store = b_buf.store({0}, 0.f);
VarHandle t("t", kInt);
// for t in 0..1024: // thread-idx
// b[0] = b[0] + a[t] // implied atomic
- ExprHandle load_a = Load::make(BufHandle(a_buf.data()), {t});
- ExprHandle load_b = Load::make(BufHandle(b_buf.data()), {0});
+ ExprHandle load_a = Load::make(a_buf, {t});
+ ExprHandle load_b = Load::make(b_buf, {0});
ExprHandle add_value = load_b + load_a;
StorePtr store_b = b_buf.store({0}, add_value);
ForPtr for_b = For::make(t, 0, N, store_b, thread_idx_options);
// covered by its own thread-idx
const static int N = 1024;
- Placeholder a_buf("a", kFloat, {2});
- Placeholder b_buf("b", kFloat, {N});
+ BufHandle a_buf("a", {2}, kFloat);
+ BufHandle b_buf("b", {N}, kFloat);
VarHandle k("k", kInt);
VarHandle l("l", kInt);
// for n in 0..2:
// a[0] = a[0] + n
StorePtr store_a0_0 = a_buf.store({0}, 0.f);
- ExprHandle load_a0 = Load::make(BufHandle(a_buf.data()), {0});
+ ExprHandle load_a0 = Load::make(a_buf, {0});
ExprHandle v1 = load_a0 + n;
StorePtr store_a0_v1 = a_buf.store({0}, v1);
ForPtr loop_a_0 = For::make(n, 0, 2, store_a0_v1);
LoopOptions block_idx_opt;
block_idx_opt.set_gpu_block_index(0);
- Placeholder a("a", kFloat, {1, M, N});
- Placeholder b("b", kFloat, {1});
+ BufHandle a("a", {1, M, N}, kFloat);
+ BufHandle b("b", {1}, kFloat);
VarHandle k("k", kInt);
VarHandle m("m", kInt);
VarHandle n("n", kInt);
// for n in 0..64: // thread_idx
// c(n) = c(n) + a(k, m, n)
ExprHandle load_cn = Load::make(kFloat, c, {n});
- ExprHandle a_kmn =
- Load::make(BufHandle(a.data()), {k * (M * N) + m * N + n});
+ ExprHandle a_kmn = Load::make(a, {k * (M * N) + m * N + n});
ExprHandle v_add = load_cn + a_kmn;
StorePtr store_cn_v = Store::make(c, {n}, v_add);
ForPtr loop_n2 = For::make(n, 0, N, store_cn_v, thread_idx_opt);
LoopOptions block_idx_opt;
block_idx_opt.set_gpu_block_index(0);
- Placeholder a("a", kFloat, {1, M, N});
- Placeholder b("b", kFloat, {1});
+ BufHandle a("a", {1, M, N}, kFloat);
+ BufHandle b("b", {1}, kFloat);
VarHandle k("k", kInt);
VarHandle m("m", kInt);
VarHandle n("n", kInt);
TEST(Cuda, HalfSupport_CUDA) {
auto half = ToDtype<at::Half>();
- Placeholder a("a", half, {4});
+ BufHandle a("a", {4}, half);
Tensor b = Compute("b", {{4, "n"}}, [&](const VarHandle& i) {
return Cast::make(half, ExprHandle(2.0f) * a.load(i));
});
TEST(Cuda, HalfPropagation_CUDA) {
auto half = ToDtype<at::Half>();
- Placeholder a("a", half, {4});
+ BufHandle a("a", {4}, half);
Tensor relu = Compute("relu", {{4, "n"}}, [&](const VarHandle& i) {
return Max::make(a.load(i), ExprHandle(alloc<HalfImm>(0)), true);
});
}
TEST(Cuda, UnusedHalfArgument_CUDA) {
- Placeholder a("a", kFloat, {4});
+ BufHandle a("a", {4}, kFloat);
auto half = ToDtype<at::Half>();
- Placeholder b("b", half, {4});
+ BufHandle b("b", {4}, half);
Tensor relu = Compute("relu", {{4, "n"}}, [&](const VarHandle& i) {
return Max::make(a.load(i), ExprHandle(alloc<FloatImm>(0)), true);
});
}
TEST(Cuda, PrioritizeDependents_CUDA) {
- Placeholder a("a", kFloat, {10});
- Placeholder b("b", kFloat, {12});
- Placeholder c("c", kFloat, {12});
+ BufHandle a("a", {10}, kFloat);
+ BufHandle b("b", {12}, kFloat);
+ BufHandle c("c", {12}, kFloat);
LoopOptions block_idx_opt;
block_idx_opt.set_gpu_block_index(0);
* c[i] = (i < 10 ? a[i] + b[i] : b[i]);
* }
*/
- ExprHandle load_a = Load::make(BufHandle(a.data()), {i});
- ExprHandle load_b = Load::make(BufHandle(b.data()), {i});
+ ExprHandle load_a = a.load({i});
+ ExprHandle load_b = b.load({i});
ExprHandle cmp = CompareSelect::make(i, 10, CompareSelectOperation::kLT);
ExprHandle ite = IfThenElse::make(cmp, Add::make(load_a, load_b), load_b);
TEST(Cuda, MaskBlockDim_CUDA) {
int A_SIZE = 100;
int B_SIZE = 50;
- Placeholder a_buf("a", kFloat, {A_SIZE});
- Placeholder b_buf("b", kFloat, {B_SIZE});
+ BufHandle a_buf("a", {A_SIZE}, kFloat);
+ BufHandle b_buf("b", {B_SIZE}, kFloat);
Tensor c = Compute("c", {{A_SIZE, "i"}}, [&](const VarHandle& i) {
return a_buf.load(i) + 10;
});
TEST(Cuda, MaskThreadDim_CUDA) {
int A_SIZE = 50;
int B_SIZE = 100;
- Placeholder a_buf("a", kFloat, {A_SIZE});
- Placeholder b_buf("b", kFloat, {B_SIZE});
+ BufHandle a_buf("a", {A_SIZE}, kFloat);
+ BufHandle b_buf("b", {B_SIZE}, kFloat);
Tensor c = Compute("c", {{A_SIZE, "i"}}, [&](const VarHandle& i) {
return a_buf.load(i) + 10;
});
TEST(Cuda, MaskMultiBlockDim_CUDA) {
int A_SIZE = 100;
int B_SIZE = 50;
- Placeholder a_buf("a", kFloat, {A_SIZE});
- Placeholder b_buf("b", kFloat, {B_SIZE});
+ BufHandle a_buf("a", {A_SIZE}, kFloat);
+ BufHandle b_buf("b", {B_SIZE}, kFloat);
Tensor c = Compute("c", {{A_SIZE, "i"}}, [&](const VarHandle& i) {
return a_buf.load(i) + 10;
});
TEST(Cuda, MaskBlockAndThreadDim_CUDA) {
int A_SIZE = 100;
int B_SIZE = 50;
- Placeholder a_buf("a", kFloat, {A_SIZE});
- Placeholder b_buf("b", kFloat, {B_SIZE});
+ BufHandle a_buf("a", {A_SIZE}, kFloat);
+ BufHandle b_buf("b", {B_SIZE}, kFloat);
Tensor c = Compute("c", {{A_SIZE, "i"}}, [&](const VarHandle& i) {
return a_buf.load(i) + 10;
});
int OUTER_SIZE = 10;
int A_SIZE = 100;
int B_SIZE = 50;
- Placeholder a_buf("a", kFloat, {OUTER_SIZE, A_SIZE});
- Placeholder b_buf("b", kFloat, {OUTER_SIZE, B_SIZE});
+ BufHandle a_buf("a", {OUTER_SIZE, A_SIZE}, kFloat);
+ BufHandle b_buf("b", {OUTER_SIZE, B_SIZE}, kFloat);
Tensor c = Compute(
"C",
{{OUTER_SIZE, "i"}, {A_SIZE, "j"}},
VarHandle OUTER_SIZE("OUTER_SIZE", kInt);
VarHandle A_SIZE("A_SIZE", kInt);
VarHandle B_SIZE("B_SIZE", kInt);
- Placeholder a_buf("a", kFloat, {OUTER_SIZE, A_SIZE});
- Placeholder b_buf("b", kFloat, {OUTER_SIZE, B_SIZE});
+ BufHandle a_buf("a", {OUTER_SIZE, A_SIZE}, kFloat);
+ BufHandle b_buf("b", {OUTER_SIZE, B_SIZE}, kFloat);
Tensor c = Compute(
"C",
{{OUTER_SIZE, "i"}, {A_SIZE, "j"}},
int OUTER_SIZE = 10;
int A_SIZE = 100;
int B_SIZE = 50;
- Placeholder a_buf("a", kFloat, {OUTER_SIZE, A_SIZE});
- Placeholder b_buf("b", kFloat, {OUTER_SIZE, B_SIZE});
- Placeholder c_buf("c", kFloat, {OUTER_SIZE, A_SIZE});
- Placeholder d_buf("d", kFloat, {OUTER_SIZE, B_SIZE});
+ BufHandle a_buf("a", {OUTER_SIZE, A_SIZE}, kFloat);
+ BufHandle b_buf("b", {OUTER_SIZE, B_SIZE}, kFloat);
+ BufHandle c_buf("c", {OUTER_SIZE, A_SIZE}, kFloat);
+ BufHandle d_buf("d", {OUTER_SIZE, B_SIZE}, kFloat);
// Can't build this using Compute and transforms yet.
LoopOptions blockBound;
int OUTER_SIZE = 10;
int A_SIZE = 100;
int B_SIZE = 50;
- Placeholder a_buf("a", kFloat, {OUTER_SIZE, A_SIZE});
- Placeholder b_buf("b", kFloat, {OUTER_SIZE, B_SIZE});
- Placeholder c_buf("c", kFloat, {OUTER_SIZE, A_SIZE});
- Placeholder d_buf("d", kFloat, {OUTER_SIZE, B_SIZE});
+ BufHandle a_buf("a", {OUTER_SIZE, A_SIZE}, kFloat);
+ BufHandle b_buf("b", {OUTER_SIZE, B_SIZE}, kFloat);
+ BufHandle c_buf("c", {OUTER_SIZE, A_SIZE}, kFloat);
+ BufHandle d_buf("d", {OUTER_SIZE, B_SIZE}, kFloat);
// Can't build this using Compute and transforms yet.
LoopOptions blockBound;
int OUTER_SIZE = 10;
int A_SIZE = 30;
int B_SIZE = 15;
- Placeholder a_buf("a", kFloat, {OUTER_SIZE, A_SIZE});
- Placeholder b_buf("b", kFloat, {OUTER_SIZE, B_SIZE});
+ BufHandle a_buf("a", {OUTER_SIZE, A_SIZE}, kFloat);
+ BufHandle b_buf("b", {OUTER_SIZE, B_SIZE}, kFloat);
Tensor c = Compute(
"C",
{{OUTER_SIZE, "i"}, {A_SIZE, "j"}},
int OUTER_B_SIZE = 5;
int A_SIZE = 30;
int B_SIZE = 15;
- Placeholder a_buf("a", kFloat, {OUTER_A_SIZE, A_SIZE});
- Placeholder b_buf("b", kFloat, {OUTER_B_SIZE, B_SIZE});
+ BufHandle a_buf("a", {OUTER_A_SIZE, A_SIZE}, kFloat);
+ BufHandle b_buf("b", {OUTER_B_SIZE, B_SIZE}, kFloat);
Tensor c = Compute(
"C",
{{OUTER_A_SIZE, "i"}, {A_SIZE, "j"}},
}
TEST(Expr, LetStmtTest01) {
- Placeholder a_buf("a", kFloat, {1});
- Placeholder b_buf("b", kFloat, {1});
+ BufHandle a_buf("a", {1}, kFloat);
+ BufHandle b_buf("b", {1}, kFloat);
ExprHandle load_a = a_buf.load(0);
VarHandle var = VarHandle("v", kFloat);
const int kVectorCount = 128;
const int kTotalSize = kVectorSize * kVectorCount;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder c_buf(BufHandle("C", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {kTotalSize}, kFloat);
+ BufHandle b_buf("B", {kTotalSize}, kFloat);
+ BufHandle c_buf("C", {kTotalSize}, kFloat);
/*
Build the following:
TEST(Expr, CompareSelectEQ) {
constexpr int N = 1024;
- Placeholder a(BufHandle("A", {N}, kInt));
- Placeholder b(BufHandle("B", {N}, kInt));
- Placeholder c(BufHandle("C", {N}, kInt));
+ BufHandle a("A", {N}, kInt);
+ BufHandle b("B", {N}, kInt);
+ BufHandle c("C", {N}, kInt);
std::vector<int> a_buffer(N, 1);
std::vector<int> b_buffer(N, 1);
std::vector<int> c_buffer(N, 0);
// different from the output dtype and verifies that it works correctly:
// result = ((int)lhs == (int)rhs) ? (float)retval1 : (float)retval2
constexpr int N = 1024;
- Placeholder a(BufHandle("A", {N}, kInt));
- Placeholder b(BufHandle("B", {N}, kInt));
- Placeholder c(BufHandle("C", {N}, kFloat));
+ BufHandle a("A", {N}, kInt);
+ BufHandle b("B", {N}, kInt);
+ BufHandle c("C", {N}, kFloat);
std::vector<int> a_buffer(N, 1);
std::vector<int> b_buffer(N, 1);
std::vector<float> c_buffer(N, 0.0f);
TEST(Expr, IntrinsicsDtypes) {
constexpr int N = 256;
- Placeholder a(BufHandle("A", {N}, kDouble));
- Placeholder b(BufHandle("B", {N}, kDouble));
+ BufHandle a("A", {N}, kDouble);
+ BufHandle b("B", {N}, kDouble);
std::vector<double> a_buffer(N, -10.0);
std::vector<double> b_buffer(N, 0.0);
std::vector<double> b_ref(N, 10.0);
TEST(Expr, DynamicShapeAdd) {
auto testWithSize = [](int32_t size) {
VarHandle n("n", kInt);
- Placeholder a(BufHandle("a", {n}, kFloat));
- Placeholder b(BufHandle("b", {n}, kFloat));
- Placeholder c(BufHandle("c", {n}, kFloat));
+ BufHandle a("a", {n}, kFloat);
+ BufHandle b("b", {n}, kFloat);
+ BufHandle c("c", {n}, kFloat);
VarHandle i("i", kInt);
StmtPtr s = For::make(i, 0, n, c.store({i}, a.load(i) + b.load(i)));
std::vector<float> aData(size, 1.0f);
void testCond01() {
const int N = 16;
PaddedBuffer<float> a_v(N);
- Placeholder a_buf("a", kFloat, {N});
+ BufHandle a_buf("a", {N}, kFloat);
VarHandle index = VarHandle("index", kInt);
StmtPtr assign_x2 = a_buf.store({index}, cast<float>(index) * 2);
StmtPtr assign_x3 = a_buf.store({index}, cast<float>(index) * 3);
void testStmtClone() {
const int N = 16;
- Placeholder a_buf("a", kInt, {N});
+ BufHandle a_buf("a", {N}, kInt);
VarHandle index = VarHandle("index", kInt);
StmtPtr body = a_buf.store({index}, 5);
StmtPtr loop = For::make(index, 0, N, body);
using namespace torch::jit::tensorexpr;
TEST(ExternalCall, Conv2d_float) {
- Placeholder Input("Input", kFloat, {1, 3, 224, 224});
- Placeholder Weight("Weight", kFloat, {16, 3, 3, 3});
- Placeholder Bias("Bias", kFloat, {16});
+ BufHandle Input("Input", {1, 3, 224, 224}, kFloat);
+ BufHandle Weight("Weight", {16, 3, 3, 3}, kFloat);
+ BufHandle Bias("Bias", {16}, kFloat);
BufHandle ResultBuf("Result", {1, 16, 112, 112}, kFloat);
int64_t stride = 2;
int64_t pad = 1;
ExternalCall::make(
ResultBuf,
"nnc_aten_conv2d",
- {BufHandle(Input.data()),
- BufHandle(Weight.data()),
- BufHandle(Bias.data())},
+ {Input, Weight, Bias},
{stride, stride, pad, pad, dilation, dilation, groups}));
LoopNest l({Result});
l.prepareForCodegen();
TEST(ExternalCall, Conv2d_int) {
// A similar test, but now using kInt tensors
- Placeholder Input("Input", kInt, {1, 3, 224, 224});
- Placeholder Weight("Weight", kInt, {16, 3, 3, 3});
- Placeholder Bias("Bias", kInt, {16});
+ BufHandle Input("Input", {1, 3, 224, 224}, kInt);
+ BufHandle Weight("Weight", {16, 3, 3, 3}, kInt);
+ BufHandle Bias("Bias", {16}, kInt);
BufHandle ResultBuf("Result", {1, 16, 112, 112}, kInt);
int64_t stride = 2;
int64_t pad = 1;
ExternalCall::make(
ResultBuf,
"nnc_aten_conv2d",
- {BufHandle(Input.data()),
- BufHandle(Weight.data()),
- BufHandle(Bias.data())},
+ {Input, Weight, Bias},
{stride, stride, pad, pad, dilation, dilation, groups}));
LoopNest l({Result});
l.prepareForCodegen();
}
TEST(ExternalCall, Conv2d_nobias_noargs) {
- Placeholder Input("Input", kFloat, {1, 16, 112, 112});
- Placeholder Weight("Weight", kFloat, {16, 16, 1, 1});
+ BufHandle Input("Input", {1, 16, 112, 112}, kFloat);
+ BufHandle Weight("Weight", {16, 16, 1, 1}, kFloat);
BufHandle ResultBuf("Result", {1, 16, 112, 112}, kFloat);
Tensor Result = Tensor(
ResultBuf.node(),
- ExternalCall::make(
- ResultBuf,
- "nnc_aten_conv2d",
- {BufHandle(Input.data()), BufHandle(Weight.data())},
- {}));
+ ExternalCall::make(ResultBuf, "nnc_aten_conv2d", {Input, Weight}, {}));
LoopNest l({Result});
l.prepareForCodegen();
l.simplify();
}
TEST(ExternalCall, Addmm_float) {
- Placeholder Input("Input", kFloat, {100, 300});
- Placeholder Mat1("Mat1", kFloat, {100, 200});
- Placeholder Mat2("Mat2", kFloat, {200, 300});
+ BufHandle Input("Input", {100, 300}, kFloat);
+ BufHandle Mat1("Mat1", {100, 200}, kFloat);
+ BufHandle Mat2("Mat2", {200, 300}, kFloat);
BufHandle ResultBuf("Result", {100, 300}, kFloat);
int64_t beta = 2;
int64_t alpha = 2;
Tensor Result = Tensor(
ResultBuf.node(),
ExternalCall::make(
- ResultBuf,
- "nnc_aten_addmm",
- {BufHandle(Input.data()),
- BufHandle(Mat1.data()),
- BufHandle(Mat2.data())},
- {beta, alpha}));
+ ResultBuf, "nnc_aten_addmm", {Input, Mat1, Mat2}, {beta, alpha}));
LoopNest l({Result});
l.prepareForCodegen();
l.simplify();
TEST(ExternalCall, Prepacked_Linear_float) {
using namespace at::native::xnnpack;
- Placeholder Input("Input", kFloat, {100, 200});
+ BufHandle Input("Input", {100, 200}, kFloat);
BufHandle ResultBuf("Result", {100, 300}, kFloat);
// Calculate reference result using at::linear.
auto prepacked = linear_clamp_prepack_op.call(
weight, bias, c10::optional<at::Scalar>(), c10::optional<at::Scalar>());
- Placeholder DummyPrepacked("DummyPrepacked", kFloat, {1});
+ BufHandle DummyPrepacked("DummyPrepacked", {1}, kFloat);
Tensor Result = Tensor(
ResultBuf.node(),
ExternalCall::make(
ResultBuf,
"nnc_prepacked_linear_clamp_run",
- {BufHandle(Input.data()), BufHandle(DummyPrepacked.data())},
+ {Input, DummyPrepacked},
{}));
LoopNest l({Result});
l.prepareForCodegen();
TEST(ExternalCall, Prepacked_Conv2d_float) {
using namespace at::native::xnnpack;
- Placeholder Input("Input", kFloat, {1, 3, 224, 224});
+ BufHandle Input("Input", {1, 3, 224, 224}, kFloat);
BufHandle ResultBuf("Result", {1, 16, 112, 112}, kFloat);
int64_t stride = 2;
int64_t pad = 1;
c10::optional<at::Scalar>(),
c10::optional<at::Scalar>());
- Placeholder DummyPrepacked("DummyPrepacked", kFloat, {1});
+ BufHandle DummyPrepacked("DummyPrepacked", {1}, kFloat);
Tensor Result = Tensor(
ResultBuf.node(),
ExternalCall::make(
ResultBuf,
"nnc_prepacked_conv2d_clamp_run",
- {BufHandle(Input.data()), BufHandle(DummyPrepacked.data())},
+ {Input, DummyPrepacked},
{}));
LoopNest l({Result});
l.prepareForCodegen();
auto intV = std::vector<int>(v.begin(), v.end());
return std::vector<ExprHandle>(intV.begin(), intV.end());
};
- Placeholder A("A", kFloat, toExprHandleVec(aShape));
- Placeholder B("", kFloat, toExprHandleVec(bShape));
+ BufHandle A("A", toExprHandleVec(aShape), kFloat);
+ BufHandle B("B", toExprHandleVec(bShape), kFloat);
BufHandle ResultBuf("Result", toExprHandleVec(resShape), kFloat);
Tensor Result = Tensor(
ResultBuf.node(),
- ExternalCall::make(
- ResultBuf,
- externCallName,
- {BufHandle(A.data()), BufHandle(B.data())},
- {}));
+ ExternalCall::make(ResultBuf, externCallName, {A, B}, {}));
LoopNest l({Result});
l.prepareForCodegen();
l.simplify();
std::vector<ExprHandle> externCallArgs;
std::tie(aShape, resShape, torchFunc, externCallName, externCallArgs) =
curTest;
- Placeholder A("A", kFloat, toExprHandleVec(aShape));
+ BufHandle A("A", toExprHandleVec(aShape), kFloat);
BufHandle ResultBuf("Result", toExprHandleVec(resShape), kFloat);
Tensor Result = Tensor(
ResultBuf.node(),
- ExternalCall::make(
- ResultBuf, externCallName, {BufHandle(A.data())}, externCallArgs));
+ ExternalCall::make(ResultBuf, externCallName, {A}, externCallArgs));
LoopNest l({Result});
l.prepareForCodegen();
l.simplify();
TEST(LLVM, fastLogFloat) {
const int kTotalSize = 128 * 128;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b_buf(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b_buf("B", {ExprHandle(kTotalSize)}, kFloat);
VarHandle index = VarHandle("index", kInt);
ExprHandle load_a = a_buf.load(index);
}
TEST(LLVM, LetTest01) {
- Placeholder a(BufHandle("A", {1}, kFloat));
+ BufHandle a("A", {1}, kFloat);
std::vector<float> v = {1, 0};
std::vector<void*> args({v.data()});
VarHandle x("x", kFloat);
}
TEST(LLVM, LetTest02) {
- Placeholder a(BufHandle("A", {1}, kFloat));
+ BufHandle a("A", {1}, kFloat);
std::vector<float> v = {1, 0};
std::vector<void*> args({v.data()});
VarHandle x("x", kFloat);
}
TEST(LLVM, LetTestMultitype) {
- Placeholder a(BufHandle("A", {1}, kDouble));
+ BufHandle a("A", {1}, kDouble);
std::vector<double> v = {1, 0};
std::vector<void*> args({v.data()});
VarHandle x("x", kByte);
}
TEST(LLVM, BufferTest) {
- Placeholder a(BufHandle("A", {32}, kFloat));
+ BufHandle a("A", {32}, kFloat);
std::vector<int32_t> v(5);
std::vector<void*> args({v.data()});
auto rv = IntImm::make(0);
}
TEST(LLVM, BlockTest) {
- Placeholder a(BufHandle("A", {32}, kInt));
+ BufHandle a("A", {32}, kInt);
std::vector<int32_t> v = {1, 2};
std::vector<void*> args({v.data()});
}
TEST(LLVM, LoadStoreTest) {
- Placeholder a(BufHandle("A", {1}, kInt));
- Placeholder b(BufHandle("B", {1}, kInt));
+ BufHandle a("A", {1}, kInt);
+ BufHandle b("B", {1}, kInt);
std::vector<int32_t> a_buffer = {42};
std::vector<int32_t> b_buffer = {-11};
}
TEST(LLVM, IfThenElseTest) {
- Placeholder a(BufHandle("A", {1}, kInt));
- Placeholder b(BufHandle("B", {1}, kInt));
- Placeholder c(BufHandle("C", {1}, kInt));
+ BufHandle a("A", {1}, kInt);
+ BufHandle b("B", {1}, kInt);
+ BufHandle c("C", {1}, kInt);
std::vector<int32_t> a_buffer = {42};
std::vector<int32_t> b_buffer = {-11};
std::vector<int32_t> c_buffer = {1};
// if (x < 10) x = x + 1
TEST(LLVM, CondNoFalseBlockTest) {
- Placeholder x(BufHandle("X", {1}, kInt));
+ BufHandle x("X", {1}, kInt);
auto cmp = CompareSelect::make(x.load(0), 10, CompareSelectOperation::kLT);
auto cond = Cond::make(cmp, x.store({0}, x.load(0) + 1), nullptr);
// x = x - 1;
// }
TEST(LLVM, CondTest) {
- Placeholder x(BufHandle("X", {1}, kInt));
+ BufHandle x("X", {1}, kInt);
auto cmp = CompareSelect::make(x.load(0), 10, CompareSelectOperation::kLT);
auto cond =
Cond::make(cmp, x.store({0}, x.load(0) + 1), x.store({0}, x.load(0) - 1));
// }
// }
TEST(LLVM, CondNestedTest) {
- Placeholder x(BufHandle("X", {1}, kInt));
+ BufHandle x("X", {1}, kInt);
auto true_cmp =
CompareSelect::make(x.load(0), 5, CompareSelectOperation::kGT);
auto true_cond = Cond::make(
}
TEST(LLVM, VecLoadStoreTest) {
- Placeholder a(BufHandle("A", {1}, kInt));
- Placeholder b(BufHandle("B", {1}, kInt));
+ BufHandle a("A", {1}, kInt);
+ BufHandle b("B", {1}, kInt);
std::vector<int32_t> a_buffer = {1, 1, 1, 1};
std::vector<int32_t> b_buffer = {2, 2, 2, 2};
#define FLOAT_INTRINSICS_TEST(Name, Lanes) \
TEST(LLVM, VecFloat_##Name##Lane##Lanes##Test) { \
- Placeholder a(BufHandle("A", {1}, kFloat)); \
- Placeholder b(BufHandle("B", {1}, kFloat)); \
+ BufHandle a("A", {1}, kFloat); \
+ BufHandle b("B", {1}, kFloat); \
float val = 0.5f; \
std::vector<float> a_buffer(Lanes, val); \
std::vector<float> b_buffer(Lanes, val); \
#define DOUBLE_INTRINSICS_TEST(Name, Lanes) \
TEST(LLVM, VecDouble_##Name##Lane##Lanes##Test) { \
- Placeholder a(BufHandle("A", {1}, kDouble)); \
- Placeholder b(BufHandle("B", {1}, kDouble)); \
+ BufHandle a("A", {1}, kDouble); \
+ BufHandle b("B", {1}, kDouble); \
float val = 0.5f; \
std::vector<double> a_buffer(Lanes, val); \
std::vector<double> b_buffer(Lanes, val); \
#undef DOUBLE_INTRINSICS_TEST
TEST(LLVM, VectorizerLoadStoreTest) {
- Placeholder a(BufHandle("A", {1}, kInt));
+ BufHandle a("A", {1}, kInt);
Tensor c =
Compute("c", {{4, "i"}}, [&](const VarHandle& i) { return a.load(i); });
- Placeholder c_buf(BufHandle(c.buf()));
+ BufHandle c_buf(c.buf());
LoopNest l({c});
StmtPtr s = l.root_stmt();
ASSERT_TRUE(LoopNest::vectorize(to<For>(to<Block>(s)->front())));
}
TEST(LLVM, VectorizeBitCast) {
- Placeholder a(BufHandle("A", {128}, kInt));
+ BufHandle a("A", {128}, kInt);
Tensor c = Compute("c", {{128, "i"}}, [&](const VarHandle& i) {
return bitcast<float>(a.load(i));
});
- Placeholder c_buf(BufHandle(c.buf()));
+ BufHandle c_buf(c.buf());
LoopNest l({c});
StmtPtr s = l.root_stmt();
ASSERT_TRUE(LoopNest::vectorize(to<For>(to<Block>(s)->front())));
TEST(LLVM, MemcpyTest) {
constexpr int N = 32;
- Placeholder a(BufHandle("A", {N}, kInt));
- Placeholder b(BufHandle("B", {N}, kInt));
+ BufHandle a("A", {N}, kInt);
+ BufHandle b("B", {N}, kInt);
std::vector<int32_t> a_buffer(N, 42);
std::vector<int32_t> b_buffer(N, 0);
TEST(LLVM, BzeroTest) {
constexpr int N = 32;
- Placeholder b(BufHandle("B", {N}, kInt));
+ BufHandle b("B", {N}, kInt);
std::vector<int32_t> b_buffer(N, 11);
VarHandle i("i", kInt);
TEST(LLVM, ElemwiseAdd) {
constexpr int N = 1024;
- Placeholder a(BufHandle("A", {N}, kInt));
- Placeholder b(BufHandle("B", {N}, kInt));
- Placeholder c(BufHandle("C", {N}, kInt));
+ BufHandle a("A", {N}, kInt);
+ BufHandle b("B", {N}, kInt);
+ BufHandle c("C", {N}, kInt);
std::vector<int32_t> a_buffer(N, 41);
std::vector<int32_t> b_buffer(N, 1);
std::vector<int32_t> c_buffer(N, 1);
TEST(LLVM, ElemwiseAddFloat) {
constexpr int N = 1024;
- Placeholder a(BufHandle("A", {N}, kFloat));
- Placeholder b(BufHandle("B", {N}, kFloat));
- Placeholder c(BufHandle("C", {N}, kFloat));
+ BufHandle a("A", {N}, kFloat);
+ BufHandle b("B", {N}, kFloat);
+ BufHandle c("C", {N}, kFloat);
std::vector<float> a_buffer(N, 41);
std::vector<float> b_buffer(N, 1);
std::vector<float> c_buffer(N, 1);
TEST(LLVM, ElemwiseLog10Float) {
constexpr int N = 1024;
- Placeholder a(BufHandle("A", {N}, kFloat));
- Placeholder b(BufHandle("B", {N}, kFloat));
+ BufHandle a("A", {N}, kFloat);
+ BufHandle b("B", {N}, kFloat);
std::vector<float> a_buffer(N, 10.0f);
std::vector<float> b_buffer(N, 2.0f);
TEST(LLVM, ElemwiseLog1pFloat) {
constexpr int N = 1024;
- Placeholder a(BufHandle("A", {N}, kFloat));
- Placeholder b(BufHandle("B", {N}, kFloat));
+ BufHandle a("A", {N}, kFloat);
+ BufHandle b("B", {N}, kFloat);
std::vector<float> a_buffer(N, expf(3.0f) - 1);
std::vector<float> b_buffer(N, 42.0f);
TEST(LLVM, ElemwiseMaxInt) {
constexpr int N = 1024;
- Placeholder a(BufHandle("A", {N}, kInt));
- Placeholder b(BufHandle("B", {N}, kInt));
- Placeholder c(BufHandle("C", {N}, kInt));
+ BufHandle a("A", {N}, kInt);
+ BufHandle b("B", {N}, kInt);
+ BufHandle c("C", {N}, kInt);
std::vector<int> a_buffer(N, 41);
std::vector<int> b_buffer(N, 1);
std::vector<int> c_buffer(N, 1);
TEST(LLVM, ElemwiseMinInt) {
constexpr int N = 1024;
- Placeholder a(BufHandle("A", {N}, kInt));
- Placeholder b(BufHandle("B", {N}, kInt));
- Placeholder c(BufHandle("C", {N}, kInt));
+ BufHandle a("A", {N}, kInt);
+ BufHandle b("B", {N}, kInt);
+ BufHandle c("C", {N}, kInt);
std::vector<int> a_buffer(N, 41);
std::vector<int> b_buffer(N, 1);
std::vector<int> c_buffer(N, 1);
TEST(LLVM, ElemwiseMaxFloat) {
constexpr int N = 1024;
- Placeholder a(BufHandle("A", {N}, kFloat));
- Placeholder b(BufHandle("B", {N}, kFloat));
- Placeholder c(BufHandle("C", {N}, kFloat));
+ BufHandle a("A", {N}, kFloat);
+ BufHandle b("B", {N}, kFloat);
+ BufHandle c("C", {N}, kFloat);
std::vector<float> a_buffer(N, 41);
std::vector<float> b_buffer(N, 1);
std::vector<float> c_buffer(N, 1);
TEST(LLVM, ElemwiseMaxNaNFloat) {
constexpr int N = 1024;
- Placeholder a(BufHandle("A", {N}, kFloat));
- Placeholder b(BufHandle("B", {N}, kFloat));
- Placeholder c(BufHandle("C", {N}, kFloat));
+ BufHandle a("A", {N}, kFloat);
+ BufHandle b("B", {N}, kFloat);
+ BufHandle c("C", {N}, kFloat);
std::vector<float> a_buffer(N, NAN);
std::vector<float> b_buffer(N, 1);
std::vector<float> c_buffer(N, 1);
TEST(LLVM, ElemwiseMinFloat) {
constexpr int N = 1024;
- Placeholder a(BufHandle("A", {N}, kFloat));
- Placeholder b(BufHandle("B", {N}, kFloat));
- Placeholder c(BufHandle("C", {N}, kFloat));
+ BufHandle a("A", {N}, kFloat);
+ BufHandle b("B", {N}, kFloat);
+ BufHandle c("C", {N}, kFloat);
std::vector<float> a_buffer(N, 41);
std::vector<float> b_buffer(N, 1);
std::vector<float> c_buffer(N, 1);
TEST(LLVM, ElemwiseMinNaNFloat) {
constexpr int N = 1024;
- Placeholder a(BufHandle("A", {N}, kFloat));
- Placeholder b(BufHandle("B", {N}, kFloat));
- Placeholder c(BufHandle("C", {N}, kFloat));
+ BufHandle a("A", {N}, kFloat);
+ BufHandle b("B", {N}, kFloat);
+ BufHandle c("C", {N}, kFloat);
std::vector<float> a_buffer(N, NAN);
std::vector<float> b_buffer(N, 1);
std::vector<float> c_buffer(N, 1);
TEST(LLVM, ElemwiseMod) {
constexpr int N = 1024;
- Placeholder a(BufHandle("A", {N}, kInt));
- Placeholder b(BufHandle("B", {N}, kInt));
- Placeholder c(BufHandle("C", {N}, kInt));
+ BufHandle a("A", {N}, kInt);
+ BufHandle b("B", {N}, kInt);
+ BufHandle c("C", {N}, kInt);
std::vector<int32_t> a_buffer(N, 41);
std::vector<int32_t> b_buffer(N, 23);
std::vector<int32_t> c_buffer(N, 18);
TEST(LLVM, CompareSelectIntEQ) {
constexpr int N = 1024;
- Placeholder a(BufHandle("A", {N}, kInt));
- Placeholder b(BufHandle("B", {N}, kInt));
- Placeholder c(BufHandle("C", {N}, kInt));
+ BufHandle a("A", {N}, kInt);
+ BufHandle b("B", {N}, kInt);
+ BufHandle c("C", {N}, kInt);
std::vector<int> a_buffer(N, 1);
std::vector<int> b_buffer(N, 1);
std::vector<int> c_buffer(N, 0);
TEST(LLVM, CompareSelectFloatEQ) {
constexpr int N = 1024;
- Placeholder a(BufHandle("A", {N}, kFloat));
- Placeholder b(BufHandle("B", {N}, kFloat));
- Placeholder c(BufHandle("C", {N}, kInt));
+ BufHandle a("A", {N}, kFloat);
+ BufHandle b("B", {N}, kFloat);
+ BufHandle c("C", {N}, kInt);
std::vector<float> a_buffer(N, 1.0f);
std::vector<float> b_buffer(N, 1.0f);
std::vector<int> c_buffer(N, 0);
TEST(LLVM, CompareSelectByteGT) {
constexpr int N = 1024;
- Placeholder a(BufHandle("A", {N}, kByte));
- Placeholder b(BufHandle("B", {N}, kByte));
- Placeholder c(BufHandle("C", {N}, kInt));
+ BufHandle a("A", {N}, kByte);
+ BufHandle b("B", {N}, kByte);
+ BufHandle c("C", {N}, kInt);
std::vector<uint8_t> a_buffer(N, 0);
std::vector<uint8_t> b_buffer(N, 0);
std::vector<int> c_buffer(N, 0);
TEST(LLVM, CompareSelectByteGE) {
constexpr int N = 1024;
- Placeholder a(BufHandle("A", {N}, kByte));
- Placeholder b(BufHandle("B", {N}, kByte));
- Placeholder c(BufHandle("C", {N}, kInt));
+ BufHandle a("A", {N}, kByte);
+ BufHandle b("B", {N}, kByte);
+ BufHandle c("C", {N}, kInt);
std::vector<uint8_t> a_buffer(N, 0);
std::vector<uint8_t> b_buffer(N, 0);
std::vector<int> c_buffer(N, 0);
TEST(LLVM, CompareSelectByteLT) {
constexpr int N = 1024;
- Placeholder a(BufHandle("A", {N}, kByte));
- Placeholder b(BufHandle("B", {N}, kByte));
- Placeholder c(BufHandle("C", {N}, kInt));
+ BufHandle a("A", {N}, kByte);
+ BufHandle b("B", {N}, kByte);
+ BufHandle c("C", {N}, kInt);
std::vector<uint8_t> a_buffer(N, 0);
std::vector<uint8_t> b_buffer(N, 128);
std::vector<int> c_buffer(N, 0);
TEST(LLVM, CompareSelectByteLE) {
constexpr int N = 1024;
- Placeholder a(BufHandle("A", {N}, kByte));
- Placeholder b(BufHandle("B", {N}, kByte));
- Placeholder c(BufHandle("C", {N}, kInt));
+ BufHandle a("A", {N}, kByte);
+ BufHandle b("B", {N}, kByte);
+ BufHandle c("C", {N}, kInt);
std::vector<uint8_t> a_buffer(N, 0);
std::vector<uint8_t> b_buffer(N, 128);
std::vector<int> c_buffer(N, 0);
}
TEST(LLVM, StoreFloat) {
- Placeholder result(BufHandle("result", {1}, kFloat));
+ BufHandle result("result", {1}, kFloat);
std::vector<float> result_buffer = {0.0f};
auto expr = result.store({0}, FloatImm::make(3.14f));
LLVMCodeGen cg(expr, {result});
});
LoopNest l({tensor});
StmtPtr stmt = l.root_stmt();
- Placeholder f_buf(BufHandle(tensor.buf()));
+ BufHandle f_buf(tensor.buf());
LLVMCodeGen cg(stmt, {f_buf});
PaddedBuffer<float> f_v(N, "f_v");
TEST(LLVM, ComputeMul) {
const int N = 1024;
- Placeholder a(BufHandle("a", {N}, kFloat));
- Placeholder b(BufHandle("b", {N}, kFloat));
+ BufHandle a("a", {N}, kFloat);
+ BufHandle b("b", {N}, kFloat);
Tensor c = Compute("c", {{N, "i"}}, [&](const VarHandle& i) {
return a.load(i) * b.load(i);
});
- Placeholder c_buf(BufHandle(c.buf()));
+ BufHandle c_buf(c.buf());
LoopNest l({c});
StmtPtr s = l.root_stmt();
TEST(LLVM, BroadcastAdd) {
const int M = 32;
const int N = 1024;
- Placeholder a(BufHandle("a", {M, N}, kFloat));
- Placeholder b(BufHandle("b", {N}, kFloat));
+ BufHandle a("a", {M, N}, kFloat);
+ BufHandle b("b", {N}, kFloat);
Tensor c = Compute(
"c", {{M, "i"}, {N, "j"}}, [&](const VarHandle& i, const VarHandle& j) {
return a.load(i, j) + b.load(j);
});
- Placeholder c_buf(BufHandle(c.buf()));
+ BufHandle c_buf(c.buf());
LoopNest l({c});
l.prepareForCodegen();
StmtPtr s = l.root_stmt();
TEST(LLVM, DynamicShapeAdd) {
auto testWithSize = [](int32_t size) {
VarHandle n("n", kInt);
- Placeholder a(BufHandle("a", {n}, kFloat));
- Placeholder b(BufHandle("b", {n}, kFloat));
- Placeholder c(BufHandle("c", {n}, kFloat));
+ BufHandle a("a", {n}, kFloat);
+ BufHandle b("b", {n}, kFloat);
+ BufHandle c("c", {n}, kFloat);
VarHandle i("i", kInt);
StmtPtr s = For::make(i, 0, n, c.store({i}, a.load(i) + b.load(i)));
std::vector<float> aData(size, 1.0f);
TEST(LLVM, BindDynamicShapeAdd) {
auto testWithSize = [](int32_t size) {
VarHandle n("n", kInt);
- Placeholder a(BufHandle("a", {n}, kFloat));
- Placeholder b(BufHandle("b", {n}, kFloat));
- Placeholder c(BufHandle("c", {n}, kFloat));
+ BufHandle a("a", {n}, kFloat);
+ BufHandle b("b", {n}, kFloat);
+ BufHandle c("c", {n}, kFloat);
VarHandle i("i", kInt);
StmtPtr s = For::make(i, 0, n, c.store({i}, a.load(i) + b.load(i)));
std::vector<float> aData(size, 1.0f);
TEST(LLVM, TensorDynamicShapeAdd) {
auto testWithSize = [](int32_t size) {
VarHandle n("n", kInt);
- Placeholder a(BufHandle("a", {n}, kFloat));
- Placeholder b(BufHandle("b", {n}, kFloat));
+ BufHandle a("a", {n}, kFloat);
+ BufHandle b("b", {n}, kFloat);
Tensor c = Compute("c", {{n, "n"}}, [&](const VarHandle& i) {
return a.load(i) + b.load(i);
});
auto testWithSize = [](int32_t M, int32_t N) {
VarHandle m("m", kInt);
VarHandle n("n", kInt);
- Placeholder a(BufHandle("a", {m, n}, kFloat));
- Placeholder b(BufHandle("b", {m, n}, kFloat));
+ BufHandle a("a", {m, n}, kFloat);
+ BufHandle b("b", {m, n}, kFloat);
Tensor c = Compute(
"c", {{m, "m"}, {n, "n"}}, [&](const VarHandle& i, const VarHandle& j) {
return a.load(i, j) + b.load(i, j);
}
TEST(LLVM, EliminatedStmt) {
- Placeholder a(BufHandle("a", {1}, kFloat));
+ BufHandle a("a", {1}, kFloat);
Tensor c = Compute("c", {{0, "m"}}, [&](const VarHandle& m) { return m; });
int N = 64;
const int kTotalSize = M * N;
- Placeholder a("a", kFloat, {1, M, N});
+ BufHandle a("a", {1, M, N}, kFloat);
// TODO: why doesn't implicit vector<DimArg> work?
std::vector<DimArg> axis = {DimArg(1)};
int N = 64;
const int kTotalSize = M * N;
- Placeholder a("a", kFloat, {1, M, N});
+ BufHandle a("a", {1, M, N}, kFloat);
// TODO: why doesn't implicit vector<DimArg> work?
std::vector<DimArg> axis = {DimArg(1)};
int N = 64;
const int kTotalSize = M * N;
- Placeholder a("a", kFloat, {1, M, N});
+ BufHandle a("a", {1, M, N}, kFloat);
Tensor b = Reduce("sum", {{1, "K"}}, Sum(), a, {{M, "M"}, {N, "N"}});
LoopNest loopnest({b});
int N = 32;
int K = 48;
- Placeholder AP(BufHandle("A", {M, K}, kFloat));
- Placeholder BP(BufHandle("B", {K, N}, kFloat));
+ BufHandle AP("A", {M, K}, kFloat);
+ BufHandle BP("B", {K, N}, kFloat);
Tensor CT = Reduce(
"gemm",
{{M, "M"}, {N, "N"}},
TEST(LLVM, CallRaw) {
const int M = 32;
VarHandle N("N", kInt);
- Placeholder a(BufHandle("a", {M, N}, kFloat));
- Placeholder b(BufHandle("b", {N}, kFloat));
+ BufHandle a("a", {M, N}, kFloat);
+ BufHandle b("b", {N}, kFloat);
Tensor c = Compute(
"c", {{M, "i"}, {N, "j"}}, [&](const VarHandle& i, const VarHandle& j) {
return a.load(i, j) + b.load(j);
TEST(LLVM, CustomTarget) {
constexpr int M = 16;
- Placeholder a("a", kFloat, {M});
- Placeholder b("b", kFloat, {M});
- Placeholder c("c", kFloat, {M});
+ BufHandle a("a", {M}, kFloat);
+ BufHandle b("b", {M}, kFloat);
+ BufHandle c("c", {M}, kFloat);
Tensor d = Compute("d", {{M, "m"}}, [&](const VarHandle& m) {
return a.load(m) * b.load(m) + c.load(m);
});
TEST(LoopNest, ExprSplitWithMask01) {
const int M = 26;
const int N = 5;
- Placeholder a_buf("a", kFloat, {M, N});
- Placeholder b_buf("b", kFloat, {M, N});
+ BufHandle a_buf("a", {M, N}, kFloat);
+ BufHandle b_buf("b", {M, N}, kFloat);
Tensor tensor = Compute(
"f", {{M, "m"}, {N, "n"}}, [&](const ExprHandle& m, const ExprHandle& n) {
return a_buf.load(m, n) + b_buf.load(m, n) + 1.0f;
// insert any masks.
TEST(LoopNest, ExprSplitWithMaskRepeatedNoMask) {
const int M = 64;
- Placeholder a_buf("a", kFloat, {M});
- Placeholder b_buf("b", kFloat, {M});
+ BufHandle a_buf("a", {M}, kFloat);
+ BufHandle b_buf("b", {M}, kFloat);
Tensor tensor = Compute("f", {{M, "m"}}, [&](const ExprHandle& m) {
return a_buf.load(m) + b_buf.load(m) + 1.0f;
});
TEST(LoopNest, TileSimple) {
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
const int M = 64, N = 64;
- Placeholder a_buf("a", kFloat, {M, N});
- Placeholder b_buf("b", kFloat, {M, N});
+ BufHandle a_buf("a", {M, N}, kFloat);
+ BufHandle b_buf("b", {M, N}, kFloat);
Tensor tensor = Compute(
"f", {{M, "m"}, {N, "n"}}, [&](const ExprHandle& m, const ExprHandle& n) {
return a_buf.load({m, n}) + b_buf.load({m, n}) + 1.0f;
TEST(LoopNest, TileWithTails) {
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
const int M = 64, N = 64;
- Placeholder a_buf("a", kFloat, {M, N});
- Placeholder b_buf("b", kFloat, {M, N});
+ BufHandle a_buf("a", {M, N}, kFloat);
+ BufHandle b_buf("b", {M, N}, kFloat);
Tensor tensor = Compute(
"f", {{M, "m"}, {N, "n"}}, [&](const ExprHandle& m, const ExprHandle& n) {
return a_buf.load({m, n}) + b_buf.load({m, n}) + 1.0f;
TEST(LoopNest, TileInMiddle) {
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
const int M = 8, N = 8, L = 8, K = 8;
- Placeholder a_buf("a", kFloat, {M, N, L, K});
- Placeholder b_buf("b", kFloat, {M, N, L, K});
+ BufHandle a_buf("a", {M, N, L, K}, kFloat);
+ BufHandle b_buf("b", {M, N, L, K}, kFloat);
Tensor tensor = Compute(
"f",
{{M, "m"}, {N, "n"}, {L, "l"}, {K, "k"}},
TEST(LoopNest, SplitWithTailWithLoopOptions) {
const int M = 21;
- Placeholder a_buf("a", kFloat, {M});
- Placeholder b_buf("b", kFloat, {M});
+ BufHandle a_buf("a", {M}, kFloat);
+ BufHandle b_buf("b", {M}, kFloat);
Tensor tensor = Compute("f", {{M, "m"}}, [&](const ExprHandle& m) {
return a_buf.load(m) + b_buf.load(m) + 1.0f;
});
TEST(LoopNest, SplitWithMaskWithLoopOptions) {
const int M = 21;
- Placeholder a_buf("a", kFloat, {M});
- Placeholder b_buf("b", kFloat, {M});
+ BufHandle a_buf("a", {M}, kFloat);
+ BufHandle b_buf("b", {M}, kFloat);
Tensor tensor = Compute("f", {{M, "m"}}, [&](const ExprHandle& m) {
return a_buf.load(m) + b_buf.load(m) + 1.0f;
});
const int M = 4;
const int N = 5;
const int K = 6;
- Placeholder a_buf("a", kFloat, {M, N});
- Placeholder b_buf("b", kFloat, {N, K});
+ BufHandle a_buf("a", {M, N}, kFloat);
+ BufHandle b_buf("b", {N, K}, kFloat);
Tensor c = Compute(
"broadcast_add",
{{M, "m"}, {N, "n"}, {K, "k"}},
const int M = 4;
const int N = 5;
const int K = 6;
- Placeholder a_buf("a", kFloat, {M, N});
- Placeholder b_buf("b", kFloat, {N, K});
+ BufHandle a_buf("a", {M, N}, kFloat);
+ BufHandle b_buf("b", {N, K}, kFloat);
Tensor c = Compute(
"broadcast_add",
{{M, "m"}, {N, "n"}, {K, "k"}},
const int M = 4;
const int N = 5;
const int K = 6;
- Placeholder a_buf("a", kFloat, {M, N});
- Placeholder b_buf("b", kFloat, {N, K});
- Placeholder c_buf("c", kFloat, {M, N});
- Placeholder d_buf("d", kFloat, {M, K});
+ BufHandle a_buf("a", {M, N}, kFloat);
+ BufHandle b_buf("b", {N, K}, kFloat);
+ BufHandle c_buf("c", {M, N}, kFloat);
+ BufHandle d_buf("d", {M, K}, kFloat);
Tensor x = Compute(
"x",
const int M = 4;
const int N = 5;
const int K = 6;
- Placeholder a_buf("a", kFloat, {M, N});
- Placeholder b_buf("b", kFloat, {N, K});
- Placeholder c_buf("c", kFloat, {M, N});
- Placeholder d_buf("d", kFloat, {M, K});
+ BufHandle a_buf("a", {M, N}, kFloat);
+ BufHandle b_buf("b", {N, K}, kFloat);
+ BufHandle c_buf("c", {M, N}, kFloat);
+ BufHandle d_buf("d", {M, K}, kFloat);
Tensor x = Compute(
"x",
const int M = 4;
const int N = 5;
const int K = 6;
- Placeholder a_buf("a", kFloat, {M, N});
- Placeholder b_buf("b", kFloat, {N, K});
+ BufHandle a_buf("a", {M, N}, kFloat);
+ BufHandle b_buf("b", {N, K}, kFloat);
Tensor x = Compute(
"x",
const int kVectorCount = 128;
const int kTotalSize = kVectorSize * kVectorCount;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
Tensor b = Compute(
"f", {{kTotalSize, "i"}}, [&](const std::vector<VarHandle>& axes) {
const int kVectorCount = 128;
const int kTotalSize = kVectorSize * kVectorCount;
- Placeholder a(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder b(BufHandle("B", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder c(BufHandle("C", {ExprHandle(kTotalSize)}, kFloat));
- Placeholder d(BufHandle("D", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a("A", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle b("B", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle c("C", {ExprHandle(kTotalSize)}, kFloat);
+ BufHandle d("D", {ExprHandle(kTotalSize)}, kFloat);
Tensor e = Compute("e", {{kTotalSize, "i"}}, [&](const VarHandle& i) {
return a.load(i) + b.load(i);
auto testWithSize = [](int32_t M, int32_t N) {
VarHandle m("m", kInt);
VarHandle n("n", kInt);
- Placeholder a(BufHandle("a", {m, n}, kFloat));
- Placeholder b(BufHandle("b", {m, n}, kFloat));
+ BufHandle a("a", {m, n}, kFloat);
+ BufHandle b("b", {m, n}, kFloat);
Tensor c = Compute(
"c", {{m, "m"}, {n, "n"}}, [&](const VarHandle& i, const VarHandle& j) {
return a.load(i, j) + b.load(i, j);
int H = 256;
int R = 3;
int Pad = 1;
- Placeholder IP("input", kFloat, {H});
+ BufHandle IP("input", {H}, kFloat);
Tensor A =
Compute("A", {{N, "np"}, {H + 2 * Pad, "hp"}}, [&](Axis n, Axis h) {
});
LoopNest l({tensor});
- Placeholder extra(BufHandle("res", {6, 3}, kFloat));
+ BufHandle extra("res", {6, 3}, kFloat);
auto loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
VarHandle i = VarHandle(loops[0]->var());
- StmtPtr store_1 =
- Store::make(BufHandle(extra.data()), {i, 0}, ExprHandle(1.f));
- StmtPtr store_2 =
- Store::make(BufHandle(extra.data()), {i, 1}, ExprHandle(2.f));
+ StmtPtr store_1 = Store::make(extra, {i, 0}, ExprHandle(1.f));
+ StmtPtr store_2 = Store::make(extra, {i, 1}, ExprHandle(2.f));
// stmt 3 is the Function body.
- StmtPtr store_3 =
- Store::make(BufHandle(extra.data()), {i, 2}, ExprHandle(4.f));
+ StmtPtr store_3 = Store::make(extra, {i, 2}, ExprHandle(4.f));
loops[0]->body()->prepend_stmt(store_1);
loops[1]->body()->prepend_stmt(store_2);
[](const std::vector<VarHandle>&) { return -1; });
LoopNest l({c});
- Placeholder extra(BufHandle("extra", {5}, kInt));
+ BufHandle extra("extra", {5}, kInt);
auto loops = l.getAllLoopNestsWritingToBuf(c.buf()).at(0);
int j = 0;
// Add an increment at each layer of the loop which counts the number of
// times the loop executes.
LoadPtr load =
- alloc<Load>(extra.data(), std::vector<ExprPtr>({alloc<IntImm>(j)}));
+ alloc<Load>(extra.node(), std::vector<ExprPtr>({alloc<IntImm>(j)}));
AddPtr add = alloc<Add>(load, alloc<IntImm>(1));
StmtPtr store = alloc<Store>(
- extra.data(), std::vector<ExprPtr>({alloc<IntImm>(j)}), add);
+ extra.node(), std::vector<ExprPtr>({alloc<IntImm>(j)}), add);
if (prepend) {
l->body()->prepend_stmt(store);
}
const int M = 4;
const int N = 5;
const int K = 6;
- Placeholder a_buf("a", kFloat, {M, N});
- Placeholder b_buf("b", kFloat, {N, K});
- Placeholder c_buf("c", kFloat, {M, N});
- Placeholder d_buf("d", kFloat, {M, K});
+ BufHandle a_buf("a", {M, N}, kFloat);
+ BufHandle b_buf("b", {N, K}, kFloat);
+ BufHandle c_buf("c", {M, N}, kFloat);
+ BufHandle d_buf("d", {M, K}, kFloat);
Tensor x = Compute(
"x",
TEST(LoopNest, NormalizeAndSplitWithTail) {
// Create a dummy tensor to construct LoopNest.
ExprHandle n(100);
- Placeholder a(BufHandle("a", {n}, kFloat));
+ BufHandle a("a", {n}, kFloat);
Tensor b =
Compute("b", {{n, "i"}}, [&](const VarHandle& i) { return a.load(i); });
LoopNest l({b});
const int N = 7;
VarHandle m("m", kInt);
VarHandle n("n", kInt);
- Placeholder b(BufHandle("b", {m, n}, kFloat));
+ BufHandle b("b", {m, n}, kFloat);
Tensor c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}});
LoopNest loop({c});
HashProvider hasher;
TEST(LoopNest, DetectInlineRankMismatch) {
const int kTotalSize = 8;
- Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
+ BufHandle a_buf("A", {ExprHandle(kTotalSize)}, kFloat);
Tensor a = Compute("a", {{kTotalSize, "i"}}, [&](const VarHandle& i) {
return a_buf.load(i);
});
TEST(LoopNest, InlineConstantIndex) {
const int N = 10;
- Placeholder x_buf("a", kFloat, {1, N, 1});
+ BufHandle x_buf("a", {1, N, 1}, kFloat);
Tensor y = Compute(
"f",
{{1, "m"}, {N, "n"}, {1, "o"}},
ASSERT_EQ(hash_before, hash_after);
}
-static std::pair<std::unique_ptr<Placeholder>, Tensor> colReduce(int M, int N) {
- auto a =
- std::make_unique<Placeholder>("a", kFloat, std::vector<ExprHandle>{M, N});
+static std::pair<BufHandle, Tensor> colReduce(int M, int N) {
+ BufHandle a("a", {M, N}, kFloat);
Tensor t = Reduce(
"b",
{{N, "n"}},
Sum(),
- [&](const VarHandle& n, const VarHandle& m) { return a->load(m, n); },
+ [&](const VarHandle& n, const VarHandle& m) { return a.load(m, n); },
{{M, "m"}});
- return {std::move(a), t};
+ return {a, t};
}
static StmtPtr splitTailReorder(Tensor b) {
return nest.root_stmt();
}
-static void checkColReduce(StmtPtr s, Placeholder& p, Tensor t) {
+static void checkColReduce(StmtPtr s, BufHandle p, Tensor t) {
int M = immediateAs<int>(p.dim(0));
int N = immediateAs<int>(p.dim(1));
PaddedBuffer<float> a(M, N);
)IR";
torch::jit::testing::FileCheck().run(verification_pattern, oss.str());
- checkColReduce(s, *p.first, p.second);
+ checkColReduce(s, p.first, p.second);
}
TEST(LoopNest, ColReduceSplitTailUnevenReorder) {
)IR";
torch::jit::testing::FileCheck().run(verification_pattern, oss.str());
- checkColReduce(s, *p.first, p.second);
+ checkColReduce(s, p.first, p.second);
}
TEST(LoopNest, ColReduceSplitMaskEvenReorder) {
constexpr int M = 76, N = 128;
auto p = colReduce(M, N);
StmtPtr s = splitMaskReorder(p.second);
- checkColReduce(s, *p.first, p.second);
+ checkColReduce(s, p.first, p.second);
}
TEST(LoopNest, ColReduceSplitMaskUnevenReorder) {
constexpr int M = 76, N = 100;
auto p = colReduce(M, N);
StmtPtr s = splitMaskReorder(p.second);
- checkColReduce(s, *p.first, p.second);
+ checkColReduce(s, p.first, p.second);
}
TEST(LoopNest, ReorderAxisWithMultipleConds) {
TEST(LoopNest, VectorizeUse) {
constexpr int N = 8;
- Placeholder a("a", kFloat, {N});
+ BufHandle a("a", {N}, kFloat);
Tensor b = Compute(
"b", {{N, "n"}}, [&](const VarHandle& n) { return a.load(n) + 1.0f; });
Tensor c = Compute(
TEST(LoopNest, Int64Direct) {
constexpr int64_t N = 12;
- Placeholder a("a", kLong, {N});
- Placeholder b("b", kLong, {N});
+ BufHandle a("a", {N}, kLong);
+ BufHandle b("b", {N}, kLong);
VarHandle n("n", kLong);
StmtPtr s = For::make(
n, LongImm::make(0l), N, b.store({n}, a.load({n}) + LongImm::make(1l)));
TEST(LoopNest, Int64Compute) {
constexpr int64_t N = 12;
- Placeholder a("a", kLong, {N});
+ BufHandle a("a", {N}, kLong);
Tensor b = Compute("b", {{N, "n"}}, [&](const VarHandle& n) {
return a.load(n) + LongImm::make(1l);
});
*/
// Can determine if 2 loops created by Compute are dependent.
- Placeholder a_buf("a", kFloat, {4, 5});
- Placeholder b_buf("b", kFloat, {5, 6});
+ BufHandle a_buf("a", {4, 5}, kFloat);
+ BufHandle b_buf("b", {5, 6}, kFloat);
Tensor c = Compute(
"broadcast_add",
{{4, "m"}, {5, "n"}, {6, "k"}},
LoopNest l({d}, {c, d});
- MemDependencyChecker analyzer({a_buf.data(), b_buf.data()}, {d.buf()});
+ MemDependencyChecker analyzer({a_buf.node(), b_buf.node()}, {d.buf()});
l.root_stmt()->accept(&analyzer);
// Sanity test: Output depends on input.
- ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), a_buf.data()));
- ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), b_buf.data()));
+ ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), a_buf.node()));
+ ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), b_buf.node()));
// Second loop depends on first loop.
auto c_loop = l.getLoopStmtsFor(c)[0];
// Check inlining affects the number of accesses returned.
- Placeholder a_buf("a", kFloat, {4, 5});
- Placeholder b_buf("b", kFloat, {5, 6});
+ BufHandle a_buf("a", {4, 5}, kFloat);
+ BufHandle b_buf("b", {5, 6}, kFloat);
Tensor c = Compute(
"broadcast_add",
{{4, "m"}, {5, "n"}, {6, "k"}},
LoopNest l({d}, {c, d});
l.computeInline(c.buf());
- MemDependencyChecker analyzer({a_buf.data(), b_buf.data()}, {d.buf()});
+ MemDependencyChecker analyzer({a_buf.node(), b_buf.node()}, {d.buf()});
l.root_stmt()->accept(&analyzer);
// Sanity test: Output depends on input.
- ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), a_buf.data()));
- ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), b_buf.data()));
+ ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), a_buf.node()));
+ ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), b_buf.node()));
// broadcast_add tensor should not appear in trace at all.
for (auto& wi : analyzer.getHistory()) {
using namespace analysis;
// Split an axis, so the number of loops != the number of dimensions.
- Placeholder a_buf("a", kFloat, {4, 5});
- Placeholder b_buf("b", kFloat, {5, 6});
+ BufHandle a_buf("a", {4, 5}, kFloat);
+ BufHandle b_buf("b", {5, 6}, kFloat);
Tensor c = Compute(
"broadcast_add",
{{4, "m"}, {5, "n"}, {6, "k"}},
LoopNest l({c});
- MemDependencyChecker analyzer_before({a_buf.data(), b_buf.data()}, {c.buf()});
+ MemDependencyChecker analyzer_before({a_buf.node(), b_buf.node()}, {c.buf()});
l.root_stmt()->accept(&analyzer_before);
l.splitWithTail(l.getLoopStmtsFor(c)[0], 2);
- MemDependencyChecker analyzer_after({a_buf.data(), b_buf.data()}, {c.buf()});
+ MemDependencyChecker analyzer_after({a_buf.node(), b_buf.node()}, {c.buf()});
StmtPtr stmt = IRSimplifier::simplify(l.root_stmt());
stmt->accept(&analyzer_after);
using namespace analysis;
// Reorder an axis, so the loop order doesn't match the indexing order.
- Placeholder a_buf("a", kFloat, {4, 5});
- Placeholder b_buf("b", kFloat, {5, 6});
+ BufHandle a_buf("a", {4, 5}, kFloat);
+ BufHandle b_buf("b", {5, 6}, kFloat);
Tensor c = Compute(
"broadcast_add",
{{4, "m"}, {5, "n"}, {6, "k"}},
LoopNest l({c});
- MemDependencyChecker analyzer_before({a_buf.data(), b_buf.data()}, {c.buf()});
+ MemDependencyChecker analyzer_before({a_buf.node(), b_buf.node()}, {c.buf()});
l.root_stmt()->accept(&analyzer_before);
auto loops = l.getLoopStmtsFor(c);
l.reorderAxis(loops[0], loops[1]);
- MemDependencyChecker analyzer_after({a_buf.data(), b_buf.data()}, {c.buf()});
+ MemDependencyChecker analyzer_after({a_buf.node(), b_buf.node()}, {c.buf()});
StmtPtr stmt = IRSimplifier::simplify(l.root_stmt());
stmt->accept(&analyzer_after);
// Can determine dependencies of a Reduction.
- Placeholder a(BufHandle("a", {2, 3, 6}, kFloat));
- Placeholder b(BufHandle("b", {2, 3, 6}, kFloat));
+ BufHandle a("a", {2, 3, 6}, kFloat);
+ BufHandle b("b", {2, 3, 6}, kFloat);
Tensor c = Compute(
"scale",
Tensor d = Reduce("sum", {{2, "l1"}}, Sum(), c, {{3, "n1"}, {6, "m1"}});
LoopNest l({d}, {c, d});
- MemDependencyChecker analyzer({a.data(), b.data()}, {d.buf()});
+ MemDependencyChecker analyzer({a.node(), b.node()}, {d.buf()});
l.root_stmt()->accept(&analyzer);
// Sanity test: Output depends on input.
- ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), a.data()));
- ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), b.data()));
+ ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), a.node()));
+ ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), b.node()));
// Second loop depends on first loop.
auto c_loop = l.getLoopStmtsFor(c)[0];
// Reduction depends on both inputs.
auto reduces = NodeFinder<ReduceOp>::find(l.root_stmt());
- ASSERT_TRUE(analyzer.dependsIndirectly(reduces[0], a.data()));
- ASSERT_TRUE(analyzer.dependsIndirectly(reduces[0], b.data()));
+ ASSERT_TRUE(analyzer.dependsIndirectly(reduces[0], a.node()));
+ ASSERT_TRUE(analyzer.dependsIndirectly(reduces[0], b.node()));
}
TEST(MemDependency, MemDependencyCheckerComputeGEMM) {
int K = 2048;
using namespace analysis;
- Placeholder AP(BufHandle("A", {M, K}, kFloat));
- Placeholder BP(BufHandle("B", {K, N}, kFloat));
+ BufHandle AP("A", {M, K}, kFloat);
+ BufHandle BP("B", {K, N}, kFloat);
Tensor CT = Reduce(
"gemm",
{{M, "M"}, {N, "N"}},
stmt->accept(&analyzer_unlowered);
// Outputs depend on inputs.
- ASSERT_TRUE(analyzer_unlowered.dependsIndirectly(CT.buf(), AP.data()));
- ASSERT_TRUE(analyzer_unlowered.dependsIndirectly(CT.buf(), BP.data()));
+ ASSERT_TRUE(analyzer_unlowered.dependsIndirectly(CT.buf(), AP.node()));
+ ASSERT_TRUE(analyzer_unlowered.dependsIndirectly(CT.buf(), BP.node()));
// The last write to gemm should cover the total bound of the output.
std::shared_ptr<AccessInfo> outputAccess =
// Likewise the first read from each input cover the entire range of the
// input.
- auto aInput = analyzer_unlowered.input(AP.data());
- auto bInput = analyzer_unlowered.input(BP.data());
+ auto aInput = analyzer_unlowered.input(AP.node());
+ auto bInput = analyzer_unlowered.input(BP.node());
// A single dependent each.
ASSERT_EQ(aInput->dependents().size(), 1);
constexpr int M = 8;
constexpr int N = 16;
- Placeholder a("a", kFloat, {M, N});
- Tensor b = computeSum({a.handle(), dims, false}, c10::kFloat);
+ BufHandle a("a", {M, N}, kFloat);
+ Tensor b = computeSum({a, dims, false}, c10::kFloat);
auto cg = compile({a}, {b});
auto at = at::arange(M * N, at::kFloat).view({M, N});
TEST(Reductions, ReduceSum0D_1) {
const int M = 10;
- Placeholder b(BufHandle("b", {M}, kFloat));
+ BufHandle b("b", {M}, kFloat);
std::vector<float> in(M);
for (int j = 0; j < M; ++j) {
in[j] = j;
TEST(Reductions, ReduceSum0D_2) {
const int M = 10;
- Placeholder b(BufHandle("b", {}, kFloat));
+ BufHandle b("b", {}, kFloat);
std::vector<float> in(1);
in[0] = 77.7;
// Sum an array to a single value.
TEST(Reductions, ReduceSum1D) {
- Placeholder b(BufHandle("b", {10}, kFloat));
+ BufHandle b("b", {10}, kFloat);
std::vector<float> in(10);
for (int j = 0; j < 10; ++j) {
in[j] = j;
VarHandle m("m", kInt);
VarHandle n("n", kInt);
- Placeholder b(BufHandle("b", {m, n}, kFloat));
+ BufHandle b("b", {m, n}, kFloat);
std::vector<float> in(M * N);
for (int i = 0; i < M; ++i) {
for (int j = 0; j < N; ++j) {
const int M = 10;
VarHandle m("m", kInt);
- Placeholder b(BufHandle("b", {2, 3, m}, kFloat));
+ BufHandle b("b", {2, 3, m}, kFloat);
Tensor c = Reduce("sum", {{2, "l"}, {3, "n"}}, Sum(), b, {{m, "m"}});
LoopNest loop({c});
}
// This is the same as just reducing the original result across that axis.
- Placeholder c_buf(BufHandle(c.buf()));
+ BufHandle c_buf(c.buf());
Tensor e = Reduce("sum3", {{2, "l"}}, Sum(), c_buf, {{3, "m"}});
LoopNest loop3({e});
loop3.prepareForCodegen();
// Sum a large (10 D) Tensor 5 dimensions in.
TEST(Reductions, ReduceSum10D) {
- Placeholder in_(BufHandle("in_", {2, 3, 2, 3, 2, 3, 2, 3, 2, 3}, kFloat));
+ BufHandle in_("in_", {2, 3, 2, 3, 2, 3, 2, 3, 2, 3}, kFloat);
const int InputSize = 2 * 3 * 2 * 3 * 2 * 3 * 2 * 3 * 2 * 3;
- Placeholder out_(BufHandle("out_", {2, 3, 2, 3, 2}, kFloat));
+ BufHandle out_("out_", {2, 3, 2, 3, 2}, kFloat);
const int OutputSize = 2 * 3 * 2 * 3 * 2;
std::vector<float> in(InputSize, 1.f);
const int M = 4;
const int N = 4;
- Placeholder b(BufHandle("b", {M, N}, kFloat));
+ BufHandle b("b", {M, N}, kFloat);
std::vector<float> in(M * N);
for (int i = 0; i < M; ++i) {
for (int j = 0; j < N; ++j) {
// Maximum reductions.
TEST(Reductions, ReduceMax) {
- Placeholder in_(BufHandle("b", {10}, kFloat));
+ BufHandle in_("b", {10}, kFloat);
std::vector<float> in(10);
std::vector<float> out(1, -1.f);
ASSERT_EQ(out[0], 9);
- Placeholder in2_(BufHandle("b", {2, 5}, kFloat));
+ BufHandle in2_("b", {2, 5}, kFloat);
std::vector<float> out2(2, -1.f);
Tensor m2d = Reduce("max", {{2, "n"}}, Maximum(kFloat), in2_, {{5, "m"}});
// Minimum reduction, with custom initialization.
TEST(Reductions, ReduceMinCustomInitializer) {
VarHandle minInit("minInit", kFloat);
- Placeholder in_(BufHandle("b", {10}, kFloat));
+ BufHandle in_("b", {10}, kFloat);
std::vector<float> in(10);
std::vector<float> out(1, -1.f);
// TODO: this is very awkward without logical And/Or operators.
TEST(Reductions, ReduceAnyAll) {
VarHandle searchValue("searchValue", kInt);
- Placeholder b(BufHandle("b", {4, 10}, kInt));
+ BufHandle b("b", {4, 10}, kInt);
Reducer anyEqSV(ExprHandle(0), [](ExprHandle a, ExprHandle b) {
return CompareSelect::make(a, 1, 1, b, kEQ);
}
TEST(Reductions, ReduceMatmul2D) {
- Placeholder tA(BufHandle("tA", {3, 2}, kFloat));
- Placeholder tB(BufHandle("tB", {2, 3}, kFloat));
+ BufHandle tA("tA", {3, 2}, kFloat);
+ BufHandle tB("tB", {2, 3}, kFloat);
std::vector<float> tA_(6);
std::vector<float> tB_(6);
}
TEST(Reductions, ReduceRfactorLike) {
- Placeholder in(BufHandle("in", {10, 10}, kFloat));
+ BufHandle in("in", {10, 10}, kFloat);
std::vector<float> in_(100);
for (int i = 0; i < 100; ++i) {
in_[i] = i;
std::vector<float> out(1, -1.f);
Tensor l1 = Reduce("l1", {{10, "i"}}, Sum(), in, {{10, "j"}});
- Placeholder in_rf(BufHandle(l1.buf()));
+ BufHandle in_rf(l1.buf());
Tensor l2 = Reduce("l2", {}, Sum(), in_rf, {{10, "i"}});
const int M = 10;
VarHandle m("m", kInt);
- Placeholder a(BufHandle("a", {2, 3}, kFloat));
- Placeholder b(BufHandle("b", {2, 3, m}, kFloat));
+ BufHandle a("a", {2, 3}, kFloat);
+ BufHandle b("b", {2, 3, m}, kFloat);
Tensor c = Reduce("sum", {{2, "l1"}, {3, "n1"}}, Sum(), b, {{m, "m1"}});
Tensor d = Compute(
const int M = 10;
VarHandle m("m", kInt);
- Placeholder a(BufHandle("a", {2, 3, m}, kFloat));
- Placeholder b(BufHandle("b", {2, 3, m}, kFloat));
+ BufHandle a("a", {2, 3, m}, kFloat);
+ BufHandle b("b", {2, 3, m}, kFloat);
Tensor c = Compute(
"scale",
}
TEST(Reductions, SplitReduceAxis) {
- Placeholder in(BufHandle("in", {16, 8}, kFloat));
+ BufHandle in("in", {16, 8}, kFloat);
std::vector<float> in_(16 * 8);
for (int i = 0; i < 16; ++i) {
}
TEST(Reductions, SplitNonReduceAxis) {
- Placeholder in(BufHandle("in", {16, 8}, kFloat));
+ BufHandle in("in", {16, 8}, kFloat);
std::vector<float> in_(16 * 8);
for (int i = 0; i < 16; ++i) {
SumOp(c(k, n), 0, a(k, m, n), {m})
*/
- Placeholder in(BufHandle("in", {1, 12, 6}, kFloat));
+ BufHandle in("in", {1, 12, 6}, kFloat);
std::vector<float> in_(12 * 6, 1.f);
Tensor tensor_ = Reduce("sum", {{1, "k"}, {12, "n"}}, Sum(), in, {{6, "m"}});
VarHandle m("m", kInt);
VarHandle n("n", kInt);
- Placeholder b(BufHandle("b", {m, n}, kFloat));
+ BufHandle b("b", {m, n}, kFloat);
std::vector<float> in(M * N);
for (int j = 0; j < M * N; ++j) {
in[j] = j;
VarHandle n("n", kInt);
VarHandle k("k", kInt);
- Placeholder b(BufHandle("b", {m, n, k}, kFloat));
+ BufHandle b("b", {m, n, k}, kFloat);
std::vector<float> in(M * N * K);
for (int j = 0; j < M * N * K; ++j) {
in[j] = j;
VarHandle n("n", kInt);
VarHandle k("k", kInt);
- Placeholder b(BufHandle("b", {m, n, k}, kFloat));
+ BufHandle b("b", {m, n, k}, kFloat);
std::vector<float> in(M * N * K);
for (int j = 0; j < M * N * K; ++j) {
in[j] = j;
}
TEST(Reductions, ReduceRepeatedInternalRfactor) {
- Placeholder in_(BufHandle("in_", {2, 3, 4, 5, 6}, kFloat));
+ BufHandle in_("in_", {2, 3, 4, 5, 6}, kFloat);
const int InputSize = 2 * 3 * 4 * 5 * 6;
std::vector<float> in(InputSize, 1.f);
const int N = 10;
const int K = 10;
- Placeholder b(BufHandle("b", {M, N, K}, kFloat));
+ BufHandle b("b", {M, N, K}, kFloat);
std::vector<float> in(M * N * K);
for (int j = 0; j < M * N * K; ++j) {
in[j] = j;
const int M = 10;
const int N = 10;
const int K = 10;
- Placeholder b(BufHandle("b", {M, N, K}, kFloat));
+ BufHandle b("b", {M, N, K}, kFloat);
std::vector<float> in(M * N * K);
for (int j = 0; j < M * N * K; ++j) {
in[j] = j;
const int N = 10;
const int K = 10;
- Placeholder b(BufHandle("b", {M, N, K}, kFloat));
+ BufHandle b("b", {M, N, K}, kFloat);
std::vector<float> in(M * N * K);
for (int j = 0; j < M * N * K; ++j) {
in[j] = j;
const int N = 10;
const int K = 10;
- Placeholder b(BufHandle("b", {M, N, K}, kFloat));
+ BufHandle b("b", {M, N, K}, kFloat);
std::vector<float> in(M * N * K);
for (int j = 0; j < M * N * K; ++j) {
in[j] = j;
const int M = 10;
const int N = 10;
const int K = 10;
- Placeholder b(BufHandle("b", {M, N, K}, kFloat));
+ BufHandle b("b", {M, N, K}, kFloat);
std::vector<float> in(M * N * K);
for (int j = 0; j < M * N * K; ++j) {
in[j] = j;
const int N = 10;
const int K = 10;
- Placeholder b(BufHandle("b", {M, N, K}, kFloat));
+ BufHandle b("b", {M, N, K}, kFloat);
std::vector<float> in(M * N * K);
for (int j = 0; j < M * N * K; ++j) {
in[j] = j;
const int K = 10;
const int SPLIT_FACTOR = 4;
- Placeholder b(BufHandle("b", {M, N, K}, kFloat));
+ BufHandle b("b", {M, N, K}, kFloat);
std::vector<float> in(M * N * K);
for (int m = 0; m < M; ++m) {
for (int j = 0; j < N * K; ++j) {
const int K = 10;
const int SPLIT_FACTOR = 16;
- Placeholder b(BufHandle("b", {N, K}, kFloat));
+ BufHandle b("b", {N, K}, kFloat);
std::vector<float> in(N * K);
for (int j = 0; j < N * K; ++j) {
in[j] = j;
const int N = 5;
const int K = 6;
- Placeholder a_buf("a", kFloat, {M});
- Placeholder b_buf("b", kFloat, {M, N, K});
+ BufHandle a_buf("a", {M}, kFloat);
+ BufHandle b_buf("b", {M, N, K}, kFloat);
Tensor x = Reduce("x", {{M, "m1"}}, Sum(), b_buf, {{N, "n1"}, {K, "k1"}});
Tensor y = Compute("y", {{M, "m2"}}, [&](const VarHandle& m) {
const int N = 5;
const int K = 6;
- Placeholder a_buf("a", kFloat, {M, N, K});
- Placeholder b_buf("b", kFloat, {M, N, K});
+ BufHandle a_buf("a", {M, N, K}, kFloat);
+ BufHandle b_buf("b", {M, N, K}, kFloat);
Tensor x = Compute(
"x",
const int N = 5;
const int K = 6;
- Placeholder a_buf("a", kFloat, {M, N, K});
- Placeholder b_buf("b", kFloat, {M, N, K});
+ BufHandle a_buf("a", {M, N, K}, kFloat);
+ BufHandle b_buf("b", {M, N, K}, kFloat);
Tensor x = Compute(
"x",
int N = 3;
int M = 2;
- Placeholder a(BufHandle("a", {L, N, M}, kFloat));
- Placeholder b(BufHandle("b", {L, N, M}, kFloat));
+ BufHandle a("a", {L, N, M}, kFloat);
+ BufHandle b("b", {L, N, M}, kFloat);
Tensor c = Compute(
"scale",
int N = 3;
int M = 2;
- Placeholder a(BufHandle("a", {L, N, M}, kFloat));
- Placeholder b(BufHandle("b", {L, N, M}, kFloat));
+ BufHandle a("a", {L, N, M}, kFloat);
+ BufHandle b("b", {L, N, M}, kFloat);
Tensor c = Compute(
"scale",
int N = 3;
int M = 2;
- Placeholder a(BufHandle("a", {L, N, M}, kFloat));
- Placeholder b(BufHandle("b", {L, N, M}, kFloat));
+ BufHandle a("a", {L, N, M}, kFloat);
+ BufHandle b("b", {L, N, M}, kFloat);
Tensor c = Compute(
"scale",
}
TEST(Reductions, ReductionCacheBodyAccess) {
- Placeholder a(BufHandle("a", {24, 32, 12}, kFloat));
- Placeholder b(BufHandle("b", {24, 32, 12}, kFloat));
+ BufHandle a("a", {24, 32, 12}, kFloat);
+ BufHandle b("b", {24, 32, 12}, kFloat);
Tensor c = Compute(
"scale",
}
TEST(Reductions, ReductionCacheConsumerAccess) {
- Placeholder a(BufHandle("a", {24, 32, 12}, kFloat));
- Placeholder b(BufHandle("b", {24, 32, 12}, kFloat));
+ BufHandle a("a", {24, 32, 12}, kFloat);
+ BufHandle b("b", {24, 32, 12}, kFloat);
Tensor c = Compute(
"scale",
}
TEST(Reductions, ReductionSplitCacheConsumerAccess) {
- Placeholder a(BufHandle("a", {24, 32, 12}, kFloat));
- Placeholder b(BufHandle("b", {24, 32, 12}, kFloat));
+ BufHandle a("a", {24, 32, 12}, kFloat);
+ BufHandle b("b", {24, 32, 12}, kFloat);
Tensor c = Compute(
"scale",
}
TEST(Reductions, ReductionReorderCacheConsumerAccess) {
- Placeholder a(BufHandle("a", {24, 32, 12}, kFloat));
- Placeholder b(BufHandle("b", {24, 32, 12}, kFloat));
+ BufHandle a("a", {24, 32, 12}, kFloat);
+ BufHandle b("b", {24, 32, 12}, kFloat);
Tensor c = Compute(
"scale",
VarHandle n("n", kInt);
VarHandle k("k", kInt);
- Placeholder b(BufHandle("B", {m, n, k}, kFloat));
+ BufHandle b("B", {m, n, k}, kFloat);
std::vector<float> in(M * N * K);
for (int j = 0; j < M * N * K; ++j) {
in[j] = j;
VarHandle n("n", kInt);
VarHandle k("k", kInt);
- Placeholder b(BufHandle("B", {m, n, k}, kFloat));
+ BufHandle b("B", {m, n, k}, kFloat);
std::vector<float> in(M * N * K);
for (int j = 0; j < M * N * K; ++j) {
in[j] = j;
std::vector<float> out_before(8, -1.f);
std::vector<float> out_after(8, -1.f);
- Placeholder in(BufHandle("in", {8, 8}, kFloat));
+ BufHandle in("in", {8, 8}, kFloat);
Tensor tensor = Reduce("sum", {{8, "m"}}, Sum(), in, {{8, "n"}});
LoopNest l_before({tensor});
}
TEST(Reductions, ReductionVectorizeInner) {
- Placeholder in(BufHandle("in", {8, 8}, kFloat));
+ BufHandle in("in", {8, 8}, kFloat);
Tensor tensor = Reduce("sum", {{8, "m"}}, Sum(), in, {{8, "n"}});
LoopNest l({tensor});
std::vector<float> out_before(1, -1.f);
std::vector<float> out_after(1, -1.f);
- Placeholder in(BufHandle("in", {8, 8}, kFloat));
+ BufHandle in("in", {8, 8}, kFloat);
Tensor tensor = Reduce("sum", {}, Sum(), in, {{8, "m"}, {8, "n"}});
TEST(Reductions, InitFunction) {
constexpr int M = 32;
constexpr int N = 16;
- Placeholder A("A", kFloat, {M, N});
- Placeholder B("B", kFloat, {N});
+ BufHandle A("A", {M, N}, kFloat);
+ BufHandle B("B", {N}, kFloat);
Tensor C = Reduce(
"C",
{{N, "n"}},
TEST(Simplify, SimplifyForCleansUp) {
{
- Placeholder a("a", kFloat, {1, 12, 1});
+ BufHandle a("a", {1, 12, 1}, kFloat);
VarHandle x("x", kInt);
Tensor b = Compute(
// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
// b[n] = 1.f;
// }
constexpr int N = 8;
- Placeholder b("b", kFloat, {N});
+ BufHandle b("b", {N}, kFloat);
VarHandle n("n", kInt);
StmtPtr s = For::make(
n, 1, N, b.store({n}, CompareSelect::make(n, 1, 0.f, 1.0f, kLT)));
// b[n] = 1.f;
// }
constexpr int N = 8;
- Placeholder b("b", kFloat, {N});
+ BufHandle b("b", {N}, kFloat);
VarHandle n("n", kInt);
StmtPtr s =
For::make(n, 1, N, b.store({n}, IfThenElse::make(n < 1, 0.f, 1.0f)));
// for (int j = 1; j < 7; j++) {
// b[i, j] = 1.f;
constexpr int N = 8;
- Placeholder b("b", kFloat, {N, N});
+ BufHandle b("b", {N, N}, kFloat);
VarHandle i("i", kInt);
VarHandle j("j", kInt);
auto csel = CompareSelect::make(i, 1, kLT);
// b[i, j] = (b[i, j]) + 1.f;
constexpr int N = 8;
constexpr int K = 3;
- Placeholder a("a", kFloat, {N, N});
- Placeholder b("b", kFloat, {N, N});
+ BufHandle a("a", {N, N}, kFloat);
+ BufHandle b("b", {N, N}, kFloat);
VarHandle i("i", kInt);
VarHandle j("j", kInt);
auto csel = CompareSelect::make(i, 1, kLT);
wrap = std::make_shared<TEWrapper>();
auto N = VarHandle("N", kInt);
auto C = VarHandle("C", kFloat);
- Placeholder A("A", kFloat, {N});
+ BufHandle A("A", {N}, kFloat);
Tensor B = Compute("B", {N}, [&](const VarHandle& i) {
auto A_elem = [&]() {
auto elem = A.load(i);
}
wrap = std::make_shared<TEWrapper>();
auto N = VarHandle("N", kInt);
- Placeholder A("A", kFloat, {N});
+ BufHandle A("A", {N}, kFloat);
Tensor B = Compute("B", {N}, [&](const VarHandle& i) {
auto zero = FloatImm::make(0.f);
auto a = A.load(i);
}
wrap = std::make_shared<TEWrapper>();
auto N = VarHandle("N", kInt);
- Placeholder A("A", kFloat, {N});
+ BufHandle A("A", {N}, kFloat);
Tensor B = Compute("B", {N}, [&](const VarHandle& i) {
auto a = A.load(i);
return fast_tanh(a);
}
wrap = std::make_shared<TEWrapper>();
auto N = VarHandle("N", kInt);
- Placeholder A("A", kFloat, {N});
+ BufHandle A("A", {N}, kFloat);
Tensor B =
Compute("B", {N}, [&](const VarHandle& i) { return sigmoid(A.load(i)); });
// NNC uses sleef for vectorizing sigmoid, which comes in an 8-wide flavor
class CodeGen::BufferArg {
public:
- BufferArg(const Placeholder& buffer) : buf_(buffer.data()) {}
BufferArg(Tensor tensor) : buf_(tensor.buf()) {}
BufferArg(const VarHandle& var) : var_(var.node()), isVar_(true) {}
BufferArg(const BufHandle& buf) : buf_(buf.node()) {}
: dtype_(expr.dtype()) {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
std::vector<BufferArg> buffer_args_extended = buffer_args;
- Placeholder ret_buf("ret_val", dtype_, {1});
+ BufHandle ret_buf("ret_val", {1}, dtype_);
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
- std::vector<ExprPtr> indices;
- ExprPtr zero = alloc<IntImm>(0);
- for (size_t i = 0; i < ret_buf.data()->ndim(); i++) {
+ std::vector<ExprHandle> indices;
+ ExprHandle zero = IntImm::make(0);
+ for (size_t i = 0; i < ret_buf.ndim(); i++) {
indices.push_back(zero);
}
- // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
- StmtPtr store_stmt =
- // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
- alloc<Store>(ret_buf.data(), indices, expr.node());
+ StmtPtr store_stmt = Store::make(ret_buf, indices, expr);
buffer_args_extended.emplace_back(ret_buf);
codegen_.reset(new CodeGenType(store_stmt, buffer_args_extended));
}
template <typename T>
inline ExprHandle load(const std::vector<T>& args) const;
+ inline ExprHandle load(const std::vector<ExprHandle>& args) const;
+
+ StorePtr store(const std::vector<ExprHandle>& args, const ExprHandle& val)
+ const;
+
bool operator==(const BufHandle& other) const {
return this->node() == other.node();
}
using VarPtr = NodePtr<Var>;
class ExprHandle;
+class VarHandle;
+class BufHandle;
class Add;
class And;
buf.node(), ExprHandleVectorToExprVector(indices), value.node());
}
+StorePtr BufHandle::store(
+ const std::vector<ExprHandle>& args,
+ const ExprHandle& value) const {
+ return Store::make(*this, args, value);
+}
+
ExprPtr flatten_index(
const std::vector<ExprPtr>& dims,
const std::vector<ExprPtr>& indices) {
}
}
-class Placeholder;
-
class TORCH_API Cast : public ExprNode<Cast> {
public:
ExprPtr src_value() const {
throw malformed_input(msg);
}
if (isContiguous(input)) {
- Placeholder inBuffer(
+ BufHandle inBuffer(
"t" + input_name_map_[input],
- ToDtype(static_cast<ScalarType>(*tt->scalarType())),
- toExprHandles(*tt->sizes().concrete_sizes()));
- bufs_.emplace(input, inBuffer.data());
+ toExprHandles(*tt->sizes().concrete_sizes()),
+ ToDtype(static_cast<ScalarType>(*tt->scalarType())));
+ bufs_.emplace(input, inBuffer.node());
bufferArgs_.emplace_back(inBuffer);
break;
}
- Placeholder inBuffer(
+ BufHandle inBuffer(
"t" + input_name_map_[input],
- ToDtype(static_cast<ScalarType>(*tt->scalarType())),
- {0});
+ {0},
+ ToDtype(static_cast<ScalarType>(*tt->scalarType())));
std::vector<DimArg> inputTensorDims;
for (size_t i = 0; i < *tt->sizes().size(); i++) {
auto const size = *tt->sizes()[i];
Reducer(ExprHandle init, ReduceInteraction& interaction)
: init_(init.node()), interaction_(interaction) {}
- Reducer(ExprHandle init, ReduceInteraction& interaction, Placeholder& buf)
- : init_(init.node()), interaction_(interaction) {}
-
template <typename RI>
Reducer(ExprHandle init, RI interaction) : init_(init.node()) {
interaction_ = interaction;
namespace jit {
namespace tensorexpr {
-class Placeholder;
-
// The common base between all statement node.
class TORCH_API Stmt : public std::enable_shared_from_this<Stmt> {
public:
const std::string& name,
const std::vector<DimArg>& dim_args,
const Reducer& reducer,
- const Placeholder& buffer,
- const std::vector<DimArg>& reduce_args) {
- return Reduce(
- name,
- dim_args,
- reducer,
- [&](ParameterList& p) { return buffer.load(p); },
- reduce_args);
-}
-
-Tensor Reduce(
- const std::string& name,
- const std::vector<DimArg>& dim_args,
- const Reducer& reducer,
const BufHandle& buffer,
const std::vector<DimArg>& reduce_args) {
return Reduce(
StmtPtr stmt_;
};
-// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
-class Placeholder {
- public:
- Placeholder() = default;
-
- // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
- Placeholder(const BufHandle& data) : data_(data.node()) {
- if (data_->base_handle()->dtype() != kHandle) {
- throw malformed_input("Placeholder dtype must be Handle");
- }
-
- // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
- std::vector<ExprHandle> stride_handles(ndim());
- for (int i = (int)ndim() - 1; i >= 0; i--) {
- // NOLINTNEXTLINE(bugprone-branch-clone)
- if (i == ndim() - 1) {
- stride_handles[i] = 1;
- } else {
- stride_handles[i] = stride_handles[i + 1] * ExprHandle(dim(i + 1));
- }
- }
- strides_ = ExprHandleVectorToExprVector(stride_handles);
- }
-
- // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
- Placeholder(
- const std::string& name,
- const Dtype& dtype,
- const std::vector<ExprHandle>& dims)
- : Placeholder(BufHandle(name, dims, dtype)) {}
-
- // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
- Placeholder(const std::vector<ExprHandle>& dims, const Dtype& dtype)
- : Placeholder(BufHandle("_", dims, dtype)) {}
-
- // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
- explicit Placeholder(const std::vector<ExprHandle>& dims)
- : Placeholder(BufHandle("_", dims, kFloat)) {}
-
- BufPtr data() const {
- return data_;
- }
- BufHandle handle() const {
- return BufHandle(data());
- }
- Dtype dtype() const {
- return data_->dtype();
- }
- int ndim() const {
- return data_->ndim();
- }
- ExprPtr dim(int index) const {
- return data_->dim(index);
- }
- std::vector<ExprPtr> dims() const {
- return data_->dims();
- }
-
- template <typename... Ts>
- inline ExprHandle load(const Ts&... ts) const;
-
- template <typename T>
- inline ExprHandle load(const std::vector<T>& args) const;
-
- inline ExprHandle load(const std::vector<ExprHandle>& args) const;
-
- inline StorePtr store(
- const std::vector<ExprHandle>& args,
- const ExprHandle& val) const {
- return alloc<Store>(data(), ExprHandleVectorToExprVector(args), val.node());
- }
-
- private:
- BufPtr data_;
- std::vector<ExprPtr> strides_;
-};
-
TORCH_API Tensor Compute(
const std::string& func_name,
const std::vector<DimArg>& dim_args,
return Reduce(func_name, dim_args, reducer, body_func, reduce_args);
}
-// Overload for the common case of all dimensions of a Placeholder.
-TORCH_API Tensor Reduce(
- const std::string& func_name,
- const std::vector<DimArg>& dim_args,
- const Reducer& reducer,
- const Placeholder& buffer,
- const std::vector<DimArg>& reduce_args);
-
TORCH_API Tensor Reduce(
const std::string& name,
const std::vector<DimArg>& dim_args,
}
template <typename... Ts>
-inline ExprHandle Placeholder::load(const Ts&... ts) const {
+inline ExprHandle BufHandle::load(const Ts&... ts) const {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
std::vector<ExprHandle> params({ExprHandle(ts)...});
- return ExprHandle(alloc<Load>(data(), ExprHandleVectorToExprVector(params)));
+ return ExprHandle(alloc<Load>(node(), ExprHandleVectorToExprVector(params)));
}
template <typename T>
-inline ExprHandle Placeholder::load(const std::vector<T>& args) const {
+inline ExprHandle BufHandle::load(const std::vector<T>& args) const {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
std::vector<ExprHandle> params(args.begin(), args.end());
- return ExprHandle(alloc<Load>(data(), ExprHandleVectorToExprVector(params)));
+ return ExprHandle(alloc<Load>(node(), ExprHandleVectorToExprVector(params)));
}
-inline ExprHandle Placeholder::load(const std::vector<ExprHandle>& args) const {
+inline ExprHandle BufHandle::load(const std::vector<ExprHandle>& args) const {
return this->template load<ExprHandle>(args);
}
-template <typename... Ts>
-inline ExprHandle BufHandle::load(const Ts&... ts) const {
- // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
- std::vector<ExprHandle> params({ExprHandle(ts)...});
- return Load::make(*this, params);
-}
-
-template <typename T>
-inline ExprHandle BufHandle::load(const std::vector<T>& args) const {
- // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
- std::vector<ExprHandle> params(args.begin(), args.end());
- return Load::make(*this, params);
-}
-
} // namespace tensorexpr
} // namespace jit
} // namespace torch
using namespace torch::jit::tensorexpr;
ArgValue convertPyToArgValue(py::handle inp) {
- if (py::isinstance<Placeholder>(inp)) {
- return py::cast<Placeholder>(inp).handle();
- } else if (py::isinstance<BufHandle>(inp)) {
+ if (py::isinstance<BufHandle>(inp)) {
return py::cast<BufHandle>(inp);
} else if (py::isinstance<VarHandle>(inp)) {
return py::cast<VarHandle>(inp);
const std::vector<ExprHandle>& args,
const ExprHandle& val) { return Store::make(self, args, val); });
- py::class_<Placeholder>(te, "Placeholder")
- .def(py::init<
- const std::string&,
- const Dtype&,
- const std::vector<ExprHandle>&>())
- .def(py::init<const std::vector<ExprHandle>&, const Dtype&>())
- .def(py::init<const std::vector<ExprHandle>&>())
- .def(
- "load",
- [](Placeholder& self, const std::vector<ExprHandle>& v) {
- return self.load(v);
- })
- .def(
- "store",
- [](Placeholder& self,
- const std::vector<ExprHandle>& args,
- const ExprHandle& val) { return self.store(args, val); })
- .def("data", [](Placeholder& self) { return BufHandle(self.data()); });
py::class_<Tensor>(te, "Tensor")
.def(
py::init([](BufHandle& b, StmtPtr s) { return Tensor(b.node(), s); }))
return Reduce(func_name, dim_args, reducer, buffer, reduce_args);
},
py::return_value_policy::reference);
- te.def(
- "Reduce",
- [](const std::string& func_name,
- const std::vector<DimArg>& dim_args,
- const Reducer& reducer,
- const Placeholder& buffer,
- const std::vector<DimArg>& reduce_args) {
- return Reduce(func_name, dim_args, reducer, buffer, reduce_args);
- },
- py::return_value_policy::reference);
te.def(
"Reduce",
#endif
py::class_<CodeGen::BufferArg>(te, "BufferArg")
- .def(py::init<const Placeholder&>())
.def(py::init<Tensor>())
.def(py::init<const VarHandle&>())
.def(py::init<const BufHandle&>());
- py::implicitly_convertible<Placeholder, CodeGen::BufferArg>();
py::implicitly_convertible<Tensor, CodeGen::BufferArg>();
py::implicitly_convertible<VarHandle, CodeGen::BufferArg>();
py::implicitly_convertible<BufHandle, CodeGen::BufferArg>();