using namespace torch::jit;
using namespace torch::jit::tensorexpr;
-void vectorize(tensorexpr::LoopNest* ln, tensorexpr::Tensor* target, int width) {
+void vectorize(tensorexpr::LoopNest* ln, tensorexpr::Tensor target, int width) {
auto loops = ln->getLoopStmtsFor(target);
ForPtr inner, tail;
ln->splitWithTail(loops[0], width, &inner, &tail);
ln->vectorize(inner);
}
-void optimizePointwise(tensorexpr::LoopNest* ln, tensorexpr::Tensor* target) {
+void optimizePointwise(tensorexpr::LoopNest* ln, tensorexpr::Tensor target) {
std::vector<ForPtr> loops = ln->getLoopStmtsFor(target);
ForPtr inner, tail;
ln->splitWithTail(loops[0], 16 * 8, &inner, &tail);
auto N = VarHandle("N", kInt);
Placeholder A("A", kFloat, {N});
auto clamp = 0;
- torch::jit::tensorexpr::Tensor* B = Compute("B", {N}, [&](const VarHandle& i){
+ torch::jit::tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i){
auto A_elem = [&]() {
auto elem = A.load(i);
auto min = FloatImm::make(clamp);
KernelScope ks;
auto N = VarHandle("N", kInt);
Placeholder A("A", kFloat, {N});
- torch::jit::tensorexpr::Tensor* B =
+ torch::jit::tensorexpr::Tensor B =
Compute("B", {N}, [&](const VarHandle& i) {
return log(A.load(i));
});
KernelScope ks;
auto N = VarHandle("N", kInt);
Placeholder A("A", kFloat, {N});
- torch::jit::tensorexpr::Tensor* B =
+ torch::jit::tensorexpr::Tensor B =
Compute("B", {N}, [&](const VarHandle& i) {
return fast_log(A.load(i));
});
KernelScope ks;
auto N = VarHandle("N", kInt);
Placeholder A("A", kFloat, {N});
- torch::jit::tensorexpr::Tensor* B =
+ torch::jit::tensorexpr::Tensor B =
Compute("B", {N}, [&](const VarHandle& i) {
return log_vml(A.load(i));
});
auto N = VarHandle("N", kInt);
Placeholder A("A", kFloat, {N});
auto clamp = 1e-6f;
- tensorexpr::Tensor* B = Compute("B", {N}, [&](const VarHandle& i) {
+ tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i) {
auto A_elem = [&]() {
auto elem = A.load(i);
auto min = FloatImm::make(clamp);
auto N = VarHandle("N", kInt);
Placeholder A("A", kFloat, {N});
auto clamp = 1e-6f;
- tensorexpr::Tensor* B = Compute("B", {N}, [&](const VarHandle& i) {
+ tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i) {
auto A_elem = [&]() {
auto elem = A.load(i);
auto min = FloatImm::make(clamp);
auto N = VarHandle("N", kInt);
Placeholder A("A", kFloat, {N});
auto clamp = 1e-6f;
- tensorexpr::Tensor* B = Compute("B", {N}, [&](const VarHandle& i) {
+ tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i) {
auto A_elem = [&]() {
auto elem = A.load(i);
auto min = FloatImm::make(clamp);
KernelScope ks;
auto N = VarHandle("N", kInt);
Placeholder A("A", kFloat, {N});
- torch::jit::tensorexpr::Tensor* B =
+ torch::jit::tensorexpr::Tensor B =
Compute("B", {N}, [&](const VarHandle& i) {
return fast_tanh(A.load(i));
});
VarHandle eps("eps", kFloat);
using axis = const VarHandle&;
- Tensor* output = Compute(
+ Tensor output = Compute(
"output",
{{N_, "N"}, {C_, "C"}, {H_, "H"}, {W_, "W"}},
[&](axis n, axis c, axis h, axis w) {
VarHandle eps("eps", kFloat);
using axis = const VarHandle&;
- Tensor* output = Compute(
+ Tensor output = Compute(
"output",
{{N_, "N"}, {C_, "C"}, {H_, "H"}, {W_, "W"}},
[&](axis n, axis c, axis h, axis w) {
te::KernelScope ks;
te::VarHandle n("n", te::kInt);
te::Placeholder A(te::BufHandle("A", {N}, te::kFloat));
- te::Tensor* relu = te::Compute("relu", {{n, "n"}}, [&](const te::VarHandle& i) {
+ te::Tensor relu = te::Compute("relu", {{n, "n"}}, [&](const te::VarHandle& i) {
return te::Max::make(A.load(i), 0.f, false);
});
- te::Tensor* min6 = te::Compute("min6", {{n, "n"}}, [&](const te::VarHandle& i) {
- return te::Min::make(relu->load(i), 6.f, false);
+ te::Tensor min6 = te::Compute("min6", {{n, "n"}}, [&](const te::VarHandle& i) {
+ return te::Min::make(relu.load(i), 6.f, false);
});
- te::Tensor* plus3 = te::Compute("plus3", {{n, "n"}}, [&](const te::VarHandle& i) {
- return min6->load(i) + 3.f;
+ te::Tensor plus3 = te::Compute("plus3", {{n, "n"}}, [&](const te::VarHandle& i) {
+ return min6.load(i) + 3.f;
});
- te::Tensor* times = te::Compute("times", {{n, "n"}}, [&](const te::VarHandle& i) {
- return A.load(i) * plus3->load(i);
+ te::Tensor times = te::Compute("times", {{n, "n"}}, [&](const te::VarHandle& i) {
+ return A.load(i) * plus3.load(i);
});
- te::Tensor* sixth = te::Compute("sixth", {{n, "n"}}, [&](const te::VarHandle& i) {
- return times->load(i) * 1.f / 6.f;
+ te::Tensor sixth = te::Compute("sixth", {{n, "n"}}, [&](const te::VarHandle& i) {
+ return times.load(i) * 1.f / 6.f;
});
te::LoopNest nest({sixth}, {relu, min6, plus3, times, sixth});
for (auto tensor : {relu, min6, plus3, times}) {
- nest.computeInline(tensor->buf());
+ nest.computeInline(tensor.buf());
}
nest.prepareForCodegen();
te::StmtPtr s = te::IRSimplifier::simplify(nest.root_stmt());
te::KernelScope ks;
te::VarHandle n("n", te::kInt);
te::Placeholder A(te::BufHandle("A", {N}, te::kFloat));
- te::Tensor* relu = te::Compute("relu", {{n, "n"}}, [&](const te::VarHandle& i) {
+ te::Tensor relu = te::Compute("relu", {{n, "n"}}, [&](const te::VarHandle& i) {
return te::Max::make(A.load(i), 0.f, false);
});
- te::Tensor* min6 = te::Compute("min6", {{n, "n"}}, [&](const te::VarHandle& i) {
- return te::Min::make(relu->load(i), 6.f, false);
+ te::Tensor min6 = te::Compute("min6", {{n, "n"}}, [&](const te::VarHandle& i) {
+ return te::Min::make(relu.load(i), 6.f, false);
});
- te::Tensor* plus3 = te::Compute("plus3", {{n, "n"}}, [&](const te::VarHandle& i) {
- return min6->load(i) + 3.f;
+ te::Tensor plus3 = te::Compute("plus3", {{n, "n"}}, [&](const te::VarHandle& i) {
+ return min6.load(i) + 3.f;
});
- te::Tensor* times = te::Compute("times", {{n, "n"}}, [&](const te::VarHandle& i) {
- return A.load(i) * plus3->load(i);
+ te::Tensor times = te::Compute("times", {{n, "n"}}, [&](const te::VarHandle& i) {
+ return A.load(i) * plus3.load(i);
});
- te::Tensor* sixth = te::Compute("sixth", {{n, "n"}}, [&](const te::VarHandle& i) {
- return times->load(i) * 1.f / 6.f;
+ te::Tensor sixth = te::Compute("sixth", {{n, "n"}}, [&](const te::VarHandle& i) {
+ return times.load(i) * 1.f / 6.f;
});
te::LoopNest nest({sixth}, {relu, min6, plus3, times, sixth});
for (auto tensor : {relu, min6, plus3, times}) {
- nest.computeInline(tensor->buf());
+ nest.computeInline(tensor.buf());
}
nest.prepareForCodegen();
te::StmtPtr s = te::IRSimplifier::simplify(nest.root_stmt());
{input_sizes_[i][0], input_sizes_[i][1]}));
}
- Tensor* output = Compute(
+ Tensor output = Compute(
"aten_cat",
{{output_size_[0], "M"}, {output_size_[1], "N"}},
[&](const VarHandle& m, const VarHandle& n) {
for_stmts[i] = for_st;
cumulative_input_sizes += input_sizes_[i][1];
}
- auto output = new Tensor(output_buf, alloc<Block>(for_stmts));
+ auto output = Tensor(output_buf, alloc<Block>(for_stmts));
LoopNest nest({output});
nest.prepareForCodegen();
te::Placeholder AP(te::BufHandle("A", {M, K}, te::kFloat));
te::Placeholder BP(te::BufHandle("B", {K, N}, te::kFloat));
- te::Tensor* CT = te::Reduce(
+ te::Tensor CT = te::Reduce(
"gemm",
{{M, "M"}, {N, "N"}},
te::Sum(),
te::Placeholder AP(te::BufHandle("A", {M, K}, te::kFloat));
te::Placeholder BP(te::BufHandle("B", {K, N}, te::kFloat));
- te::Tensor* CT = te::Reduce(
+ te::Tensor CT = te::Reduce(
"gemm",
{{M, "M"}, {N, "N"}},
te::Sum(),
te::Placeholder AP(te::BufHandle("A", {M, K}, te::kFloat));
te::Placeholder BP(te::BufHandle("B", {K, N}, te::kFloat));
- te::Tensor* CT = te::Reduce(
+ te::Tensor CT = te::Reduce(
"gemm",
{{M, "M"}, {N, "N"}},
te::Sum(),
te::Placeholder AP(te::BufHandle("A", {M, K}, te::kFloat));
te::Placeholder BP(te::BufHandle("B", {K, N}, te::kFloat));
- te::Tensor* CT = te::Reduce(
+ te::Tensor CT = te::Reduce(
"gemm",
{{M, "M"}, {N, "N"}},
te::Sum(),
te::Placeholder AP(te::BufHandle("A", {M, K}, te::kFloat));
te::Placeholder BP(te::BufHandle("B", {K, N}, te::kFloat));
- te::Tensor* CT = te::Reduce(
+ te::Tensor CT = te::Reduce(
"gemm",
{{M, "M"}, {N, "N"}},
te::Sum(),
}
{
auto const& loops = loop.getLoopStmtsFor(CT);
- loop.cacheAccesses(CT->buf(), "C_regs", loops[2]);
+ loop.cacheAccesses(CT.buf(), "C_regs", loops[2]);
}
loop.prepareForCodegen();
KernelScope kernel_scope;
Placeholder a_buf("a", kFloat, {M});
Placeholder b_buf("b", kFloat, {M});
- Tensor* c_tensor = Compute(
+ Tensor c_tensor = Compute(
"c", {{M, "m"}}, [&](const VarHandle& m) {
return a_buf.load(m) + b_buf.load(m);
});
int M = A.numel();
te::Placeholder AP(te::BufHandle("A", {M}, te::kFloat));
- te::Tensor* BT = te::Reduce(
+ te::Tensor BT = te::Reduce(
"reduce_full",
{{1, "N"}},
te::Sum(),
int M = A.numel();
te::Placeholder AP(te::BufHandle("A", {M}, te::kFloat));
- te::Tensor* BT = te::Reduce(
+ te::Tensor BT = te::Reduce(
"reduce_full",
{{1, "N"}},
te::Sum(),
int M = A.numel();
te::Placeholder AP(te::BufHandle("A", {M}, te::kFloat));
- te::Tensor* BT = te::Reduce(
+ te::Tensor BT = te::Reduce(
"reduce_full",
{{1, "N"}},
te::Sum(),
TORCH_CHECK(M % kChunkSize == 0);
te::Placeholder AP(te::BufHandle("A", {M}, te::kFloat));
- te::Tensor* BT = te::Reduce(
+ te::Tensor BT = te::Reduce(
"reduce_full",
{},
te::Sum(),
loop.reorderAxis(mo, mi);
loops = loop.getLoopStmtsFor(BT);
- auto bt_body = loop.getAllWritesToBuf(BT->buf())[1];
+ auto bt_body = loop.getAllWritesToBuf(BT.buf())[1];
TORCH_CHECK(loop.rfactor(bt_body, loops.at(0), &rfac_buf));
loop.reorderAxis(loops.at(0), loops.at(1));
const int kChunkSize = 8;
te::Placeholder a("A", te::kFloat, {M});
- te::Tensor* b = te::computeSum({a.handle(), te::IntList({0}), false}, at::kFloat);
+ te::Tensor b = te::computeSum({a.handle(), te::IntList({0}), false}, at::kFloat);
te::LoopNest nest({b});
auto loops = nest.getLoopStmtsFor(b);
te::KernelScope ks;
constexpr int kCacheSize = 1 << 12;
te::Placeholder a("A", te::kFloat, {M, N});
- te::Tensor* b = te::computeSum({a.handle(), te::IntList({0}), false}, at::kFloat);
+ te::Tensor b = te::computeSum({a.handle(), te::IntList({0}), false}, at::kFloat);
te::LoopNest nest({b});
auto sch = state.range(2);
te::KernelScope ks;
constexpr int kChunkSize = 8;
te::Placeholder a("A", te::kFloat, {M, N});
- te::Tensor* b = te::computeSum({a.handle(), te::IntList({1}), false}, at::kFloat);
+ te::Tensor b = te::computeSum({a.handle(), te::IntList({1}), false}, at::kFloat);
te::LoopNest nest({b});
auto sch = state.range(2);
using namespace torch::indexing;
namespace te = torch::jit::tensorexpr;
-static void vectorize(te::LoopNest* ln, te::Tensor* target, int width) {
+static void vectorize(te::LoopNest* ln, te::Tensor target, int width) {
auto loops = ln->getLoopStmtsFor(target);
te::ForPtr inner, tail;
ln->splitWithTail(loops[0], width, &inner, &tail);
te::KernelScope ks;
te::VarHandle N("N", te::kInt);
te::Placeholder A("A", te::kFloat, {N});
- te::Tensor* B = te::Compute(
+ te::Tensor B = te::Compute(
"B", {N}, [&](const te::VarHandle& i) { return log_vml(A.load(i)); });
te::LoopNest ln({B});
KernelScope kernel_scope;
ExprHandle n(100);
Placeholder a(BufHandle("a", {n}, kFloat));
- Tensor* b =
+ Tensor b =
Compute("b", {{n, "i"}}, [&](const VarHandle& i) { return a.load(i); });
LoopNest l({b});
auto bounds_info = inferBounds(l.root_stmt());
ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
verifyConstBounds(bounds_info.at(a.data())[0], {{0, 99}});
- ASSERT_EQ(bounds_info.at(b->buf()).size(), 1);
- ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kStore);
- verifyConstBounds(bounds_info.at(b->buf())[0], {{0, 99}});
+ ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
+ ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kStore);
+ verifyConstBounds(bounds_info.at(b.buf())[0], {{0, 99}});
}
TEST(BoundsInference, _2) {
KernelScope kernel_scope;
VarHandle n("n", kInt);
Placeholder a(BufHandle("a", {n}, kFloat));
- Tensor* b =
+ Tensor b =
Compute("b", {{n, "i"}}, [&](const VarHandle& i) { return a.load(i); });
LoopNest l({b});
auto bounds_info = inferBounds(l.root_stmt());
ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
verifyConstBounds(bounds_info.at(a.data())[0], {{0, -1}});
- ASSERT_EQ(bounds_info.at(b->buf()).size(), 1);
- ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kStore);
- verifyConstBounds(bounds_info.at(b->buf())[0], {{0, -1}});
+ ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
+ ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kStore);
+ verifyConstBounds(bounds_info.at(b.buf())[0], {{0, -1}});
}
TEST(BoundsInference, _3) {
KernelScope kernel_scope;
ExprHandle n(100);
Placeholder a(BufHandle("a", {n + 10}, kFloat));
- Tensor* b = Compute("b", {{n, "i"}}, [&](const VarHandle& i) {
+ Tensor b = Compute("b", {{n, "i"}}, [&](const VarHandle& i) {
return a.load(i) * a.load(i + 10);
});
LoopNest l({b});
ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
verifyConstBounds(bounds_info.at(a.data())[0], {{0, 109}});
- ASSERT_EQ(bounds_info.at(b->buf()).size(), 1);
- ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kStore);
- verifyConstBounds(bounds_info.at(b->buf())[0], {{0, 99}});
+ ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
+ ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kStore);
+ verifyConstBounds(bounds_info.at(b.buf())[0], {{0, 99}});
}
TEST(BoundsInference, _4) {
ExprHandle W(320);
ExprHandle H(200);
Placeholder a(BufHandle("a", {H, W}, kFloat));
- Tensor* b = Compute(
+ Tensor b = Compute(
"b", {{H, "y"}, {W, "x"}}, [&](const VarHandle& y, const VarHandle& x) {
return x * y;
});
- Tensor* c = Compute(
+ Tensor c = Compute(
"c", {{H, "y"}, {W, "x"}}, [&](const VarHandle& y, const VarHandle& x) {
- return a.load(y, x) * b->load(y, x);
+ return a.load(y, x) * b.load(y, x);
});
LoopNest l({c});
std::vector<ForPtr> loops = l.getLoopStmtsFor(c);
ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
verifyConstBounds(bounds_info.at(a.data())[0], {{0, 199}, {0, 319}});
- ASSERT_EQ(bounds_info.at(b->buf()).size(), 1);
- ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kLoad);
- verifyConstBounds(bounds_info.at(b->buf())[0], {{0, 199}, {0, 319}});
+ ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
+ ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kLoad);
+ verifyConstBounds(bounds_info.at(b.buf())[0], {{0, 199}, {0, 319}});
- ASSERT_EQ(bounds_info.at(c->buf()).size(), 1);
- ASSERT_EQ(bounds_info.at(c->buf())[0].kind, kStore);
- verifyConstBounds(bounds_info.at(c->buf())[0], {{0, 199}, {0, 319}});
+ ASSERT_EQ(bounds_info.at(c.buf()).size(), 1);
+ ASSERT_EQ(bounds_info.at(c.buf())[0].kind, kStore);
+ verifyConstBounds(bounds_info.at(c.buf())[0], {{0, 199}, {0, 319}});
}
{
// Infer bounds on the inner loop scope
ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
verifyConstBounds(bounds_info.at(a.data())[0], {{-1, -1}, {0, 319}});
- ASSERT_EQ(bounds_info.at(b->buf()).size(), 1);
- ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kLoad);
- verifyConstBounds(bounds_info.at(b->buf())[0], {{-1, -1}, {0, 319}});
+ ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
+ ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kLoad);
+ verifyConstBounds(bounds_info.at(b.buf())[0], {{-1, -1}, {0, 319}});
- ASSERT_EQ(bounds_info.at(c->buf()).size(), 1);
- ASSERT_EQ(bounds_info.at(c->buf())[0].kind, kStore);
- verifyConstBounds(bounds_info.at(c->buf())[0], {{-1, -1}, {0, 319}});
+ ASSERT_EQ(bounds_info.at(c.buf()).size(), 1);
+ ASSERT_EQ(bounds_info.at(c.buf())[0].kind, kStore);
+ verifyConstBounds(bounds_info.at(c.buf())[0], {{-1, -1}, {0, 319}});
}
{
// Infer bounds on the inner loop body's scope
ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
verifyConstBounds(bounds_info.at(a.data())[0], {{-1, -1}, {-1, -1}});
- ASSERT_EQ(bounds_info.at(b->buf()).size(), 1);
- ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kLoad);
- verifyConstBounds(bounds_info.at(b->buf())[0], {{-1, -1}, {-1, -1}});
+ ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
+ ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kLoad);
+ verifyConstBounds(bounds_info.at(b.buf())[0], {{-1, -1}, {-1, -1}});
- ASSERT_EQ(bounds_info.at(c->buf()).size(), 1);
- ASSERT_EQ(bounds_info.at(c->buf())[0].kind, kStore);
- verifyConstBounds(bounds_info.at(c->buf())[0], {{-1, -1}, {-1, -1}});
+ ASSERT_EQ(bounds_info.at(c.buf()).size(), 1);
+ ASSERT_EQ(bounds_info.at(c.buf())[0].kind, kStore);
+ verifyConstBounds(bounds_info.at(c.buf())[0], {{-1, -1}, {-1, -1}});
}
}
KernelScope kernel_scope;
ExprHandle n(100);
Placeholder a(BufHandle("a", {n}, kFloat));
- Tensor* b =
+ Tensor b =
Compute("b", {{n, "i"}}, [&](const VarHandle& i) { return a.load(i); });
LoopNest l({b});
ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
verifyConstBounds(bounds_info.at(a.data())[0], {{0, 95}});
- ASSERT_EQ(bounds_info.at(b->buf()).size(), 1);
- ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kStore);
- verifyConstBounds(bounds_info.at(b->buf())[0], {{0, 95}});
+ ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
+ ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kStore);
+ verifyConstBounds(bounds_info.at(b.buf())[0], {{0, 95}});
}
{
// Verify inferred bounds for the tail loop
ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
verifyConstBounds(bounds_info.at(a.data())[0], {{96, 99}});
- ASSERT_EQ(bounds_info.at(b->buf()).size(), 1);
- ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kStore);
- verifyConstBounds(bounds_info.at(b->buf())[0], {{96, 99}});
+ ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
+ ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kStore);
+ verifyConstBounds(bounds_info.at(b.buf())[0], {{96, 99}});
}
}
ExprHandle CW(32);
ExprHandle CH(20);
Placeholder a(BufHandle("a", {H, W}, kFloat));
- Tensor* b = Compute(
+ Tensor b = Compute(
"b", {{H, "y"}, {W, "x"}}, [&](const VarHandle& y, const VarHandle& x) {
return x * y;
});
- Tensor* c = Compute(
+ Tensor c = Compute(
"c", {{CH, "y"}, {CW, "x"}}, [&](const VarHandle& y, const VarHandle& x) {
- return a.load(y + 100, x + 100) * b->load(y * 2, x * 5);
+ return a.load(y + 100, x + 100) * b.load(y * 2, x * 5);
});
LoopNest l({c});
std::vector<ForPtr> loops = l.getLoopStmtsFor(c);
ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
verifyConstBounds(bounds_info.at(a.data())[0], {{100, 119}, {100, 131}});
- ASSERT_EQ(bounds_info.at(b->buf()).size(), 1);
- ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kLoad);
- verifyConstBounds(bounds_info.at(b->buf())[0], {{0, 38}, {0, 155}});
+ ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
+ ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kLoad);
+ verifyConstBounds(bounds_info.at(b.buf())[0], {{0, 38}, {0, 155}});
- ASSERT_EQ(bounds_info.at(c->buf()).size(), 1);
- ASSERT_EQ(bounds_info.at(c->buf())[0].kind, kStore);
- verifyConstBounds(bounds_info.at(c->buf())[0], {{0, 19}, {0, 31}});
+ ASSERT_EQ(bounds_info.at(c.buf()).size(), 1);
+ ASSERT_EQ(bounds_info.at(c.buf())[0].kind, kStore);
+ verifyConstBounds(bounds_info.at(c.buf())[0], {{0, 19}, {0, 31}});
}
{
// Infer bounds on the inner loop scope
ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
verifyConstBounds(bounds_info.at(a.data())[0], {{-1, -1}, {100, 131}});
- ASSERT_EQ(bounds_info.at(b->buf()).size(), 1);
- ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kLoad);
- verifyConstBounds(bounds_info.at(b->buf())[0], {{-1, -1}, {0, 155}});
+ ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
+ ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kLoad);
+ verifyConstBounds(bounds_info.at(b.buf())[0], {{-1, -1}, {0, 155}});
- ASSERT_EQ(bounds_info.at(c->buf()).size(), 1);
- ASSERT_EQ(bounds_info.at(c->buf())[0].kind, kStore);
- verifyConstBounds(bounds_info.at(c->buf())[0], {{-1, -1}, {0, 31}});
+ ASSERT_EQ(bounds_info.at(c.buf()).size(), 1);
+ ASSERT_EQ(bounds_info.at(c.buf())[0].kind, kStore);
+ verifyConstBounds(bounds_info.at(c.buf())[0], {{-1, -1}, {0, 31}});
}
{
// Infer bounds on the inner loop body's scope
ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
verifyConstBounds(bounds_info.at(a.data())[0], {{-1, -1}, {-1, -1}});
- ASSERT_EQ(bounds_info.at(b->buf()).size(), 1);
- ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kLoad);
- verifyConstBounds(bounds_info.at(b->buf())[0], {{-1, -1}, {-1, -1}});
+ ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
+ ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kLoad);
+ verifyConstBounds(bounds_info.at(b.buf())[0], {{-1, -1}, {-1, -1}});
- ASSERT_EQ(bounds_info.at(c->buf()).size(), 1);
- ASSERT_EQ(bounds_info.at(c->buf())[0].kind, kStore);
- verifyConstBounds(bounds_info.at(c->buf())[0], {{-1, -1}, {-1, -1}});
+ ASSERT_EQ(bounds_info.at(c.buf()).size(), 1);
+ ASSERT_EQ(bounds_info.at(c.buf())[0].kind, kStore);
+ verifyConstBounds(bounds_info.at(c.buf())[0], {{-1, -1}, {-1, -1}});
}
}
KernelScope kernel_scope;
ExprHandle H(6);
Placeholder a(BufHandle("a", {20}, kFloat));
- Tensor* b =
+ Tensor b =
Compute("b", {{H, "x"}}, [&](const VarHandle& x) { return a.load(x); });
- Tensor* c = Compute(
+ Tensor c = Compute(
"c", {{H, "x"}}, [&](const VarHandle& x) { return a.load(x + H); });
LoopNest l({b, c});
std::vector<ForPtr> loops = NodeFinder<For>::find(l.root_stmt());
ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
verifyConstBounds(bounds_info.at(a.data())[0], {{0, 5}});
- ASSERT_EQ(bounds_info.at(b->buf()).size(), 1);
- ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kStore);
- verifyConstBounds(bounds_info.at(b->buf())[0], {{0, 5}});
+ ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
+ ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kStore);
+ verifyConstBounds(bounds_info.at(b.buf())[0], {{0, 5}});
}
{
// Infer bounds on the inner loop scope
ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
verifyConstBounds(bounds_info.at(a.data())[0], {{6, 11}});
- ASSERT_EQ(bounds_info.at(c->buf()).size(), 1);
- ASSERT_EQ(bounds_info.at(c->buf())[0].kind, kStore);
- verifyConstBounds(bounds_info.at(c->buf())[0], {{0, 5}});
+ ASSERT_EQ(bounds_info.at(c.buf()).size(), 1);
+ ASSERT_EQ(bounds_info.at(c.buf())[0].kind, kStore);
+ verifyConstBounds(bounds_info.at(c.buf())[0], {{0, 5}});
}
{
// Infer bounds on the high level program.
ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad);
verifyConstBounds(bounds_info.at(a.data())[0], {{0, 11}});
- ASSERT_EQ(bounds_info.at(b->buf()).size(), 1);
- ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kStore);
- verifyConstBounds(bounds_info.at(b->buf())[0], {{0, 5}});
+ ASSERT_EQ(bounds_info.at(b.buf()).size(), 1);
+ ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kStore);
+ verifyConstBounds(bounds_info.at(b.buf())[0], {{0, 5}});
- ASSERT_EQ(bounds_info.at(c->buf()).size(), 1);
- ASSERT_EQ(bounds_info.at(c->buf())[0].kind, kStore);
- verifyConstBounds(bounds_info.at(c->buf())[0], {{0, 5}});
+ ASSERT_EQ(bounds_info.at(c.buf()).size(), 1);
+ ASSERT_EQ(bounds_info.at(c.buf())[0].kind, kStore);
+ verifyConstBounds(bounds_info.at(c.buf())[0], {{0, 5}});
}
}
TEST(BoundsInference, MultipleTopLoopLoad) {
KernelScope kernel_scope;
Placeholder a(BufHandle("a", {100}, kFloat));
- Tensor* b =
+ Tensor b =
Compute("b", {{64, "x"}}, [&](const VarHandle& x) { return a.load(x); });
- Tensor* c = Compute(
+ Tensor c = Compute(
"c", {{32, "x"}}, [&](const VarHandle& x) { return a.load(x + 10); });
- Tensor* d = Compute(
+ Tensor d = Compute(
"d", {{96, "x"}}, [&](const VarHandle& x) { return a.load(x + 2); });
LoopNest l({b, c, d});
// b, c, d only written.
{
- auto bounds = bounds_info[b->buf()];
+ auto bounds = bounds_info[b.buf()];
ASSERT_EQ(bounds.size(), 1);
auto bound = bounds[0];
ASSERT_EQ(bound.kind, TensorAccessKind::kStore);
verifyConstBounds(bound, {{0, 63}});
}
{
- auto bounds = bounds_info[c->buf()];
+ auto bounds = bounds_info[c.buf()];
ASSERT_EQ(bounds.size(), 1);
auto bound = bounds[0];
ASSERT_EQ(bound.kind, TensorAccessKind::kStore);
verifyConstBounds(bound, {{0, 31}});
}
{
- auto bounds = bounds_info[d->buf()];
+ auto bounds = bounds_info[d.buf()];
ASSERT_EQ(bounds.size(), 1);
auto bound = bounds[0];
ASSERT_EQ(bound.kind, TensorAccessKind::kStore);
TEST(BoundsInference, CacheReads) {
KernelScope kernel_scope;
- Tensor* A = Compute(
+ Tensor A = Compute(
"A", {{64, "i"}, {64, "j"}}, [](const VarHandle& i, const VarHandle& j) {
return i * j;
});
- Tensor* B = Compute(
+ Tensor B = Compute(
"B", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) {
- return A->load(i + 30, j + 3);
+ return A.load(i + 30, j + 3);
});
- Tensor* C = Compute(
+ Tensor C = Compute(
"C", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) {
- return A->load(i + 10, j + 20) + A->load(i + 30, j + 40);
+ return A.load(i + 10, j + 20) + A.load(i + 30, j + 40);
});
LoopNest l({B, C});
auto bounds_info_before = inferBounds(l.root_stmt());
StmtPtr j_loop = l.getLoopStmtsFor(B)[1];
- LoopNest::cacheAccesses(A->buf(), "A_local", j_loop);
+ LoopNest::cacheAccesses(A.buf(), "A_local", j_loop);
auto bounds_info_after = inferBounds(l.root_stmt());
TEST(BoundsInference, Flattened) {
KernelScope kernel_scope;
- Tensor* b = Compute(
+ Tensor b = Compute(
"b",
{{3, "z"}, {4, "y"}, {5, "x"}},
[&](const VarHandle& z, const VarHandle& y, const VarHandle& x) {
// There's only one buffer.
ASSERT_EQ(bounds_info.size(), 1);
- auto& TABI = bounds_info[b->buf()][0];
+ auto& TABI = bounds_info[b.buf()][0];
ASSERT_EQ(TABI.kind, TensorAccessKind::kStore);
// Flattened bounds should have a single dimension.
ASSERT_EQ(TABI.start.size(), 1);
TEST(BoundsInference, GetPotentialHazardsLoopNoHazard) {
KernelScope kernel_scope;
- Tensor* A = Compute(
+ Tensor A = Compute(
"A", {{64, "i"}, {64, "j"}}, [](const VarHandle& i, const VarHandle& j) {
return i * j;
});
- Tensor* B = Compute(
+ Tensor B = Compute(
"B", {{64, "i"}, {64, "j"}}, [](const VarHandle& i, const VarHandle& j) {
return (i + 1) * (j + 1);
});
TEST(BoundsInference, GetPotentialHazardsLoopCall) {
KernelScope kernel_scope;
- Tensor* A = Compute(
+ Tensor A = Compute(
"A", {{64, "i"}, {64, "j"}}, [](const VarHandle& i, const VarHandle& j) {
return i * j;
});
- Tensor* B = Compute(
+ Tensor B = Compute(
"B", {{64, "i"}, {64, "j"}}, [&](const VarHandle& i, const VarHandle& j) {
- return A->load(i, j) + 5;
+ return A.load(i, j) + 5;
});
LoopNest l({A, B});
TEST(BoundsInference, GetPotentialHazardsLoopSplit) {
KernelScope kernel_scope;
- Tensor* A = Compute(
+ Tensor A = Compute(
"A", {{64, "i"}, {64, "j"}}, [](const VarHandle& i, const VarHandle& j) {
return i * j;
});
te::Placeholder input("input", te::kFloat, {N, C, H, W});
te::Placeholder weight("weight", te::kFloat, {K, CperG, R, S});
te::Placeholder bias("bias", te::kFloat, {K});
- te::Tensor* output = te::conv2d_depthwise(
+ te::Tensor output = te::conv2d_depthwise(
input.handle(), weight.handle(), bias.handle(), kStride, kPad, kGroups);
te::LoopNest loop({output});
te::Placeholder input("input", te::kFloat, {N, C, H, W});
te::Placeholder weight("weight", te::kFloat, {K, CperG, R, S});
- te::Tensor* output = te::conv2d_depthwise(
+ te::Tensor output = te::conv2d_depthwise(
input.handle(), weight.handle(), kStride, kPad, kGroups);
te::LoopNest loop({output});
te::Placeholder input("input", te::kFloat, {N_var, C_var, H_var, W_var});
te::Placeholder weight(
"weight", te::kFloat, {K_var, CperG_var, R_var, S_var});
- te::Tensor* output = te::conv2d_depthwise(
+ te::Tensor output = te::conv2d_depthwise(
input.handle(),
weight.handle(),
N_var,
te::Placeholder inputB(te::BufHandle("input", {N, C, H, W}, te::kFloat));
te::Placeholder filterB(te::BufHandle("filter", {K, C, R, S}, te::kFloat));
- te::Tensor* conv = te::Reduce(
+ te::Tensor conv = te::Reduce(
"conv",
{{N, "n"}, {K, "k"}, {OH, "oh"}, {OW, "ow"}},
te::Sum(),
Dtype dtype = ToDtype<ctype>();
Placeholder a_buf("a", dtype, {num_iter, block_count, block_size});
Placeholder b_buf("b", dtype, {num_iter, block_count, block_size});
- Tensor* c = Compute(
+ Tensor c = Compute(
"c",
{
{num_iter, "n"},
const int block_size = 128;
Dtype dtype = ToDtype<float>();
Placeholder a_buf("a", dtype, {num_iter, block_count, block_size});
- Tensor* c = Compute(
+ Tensor c = Compute(
"c",
{
{num_iter, "n"},
KernelScope kernel_scope;
Placeholder a_buf("a", kFloat, {N});
Placeholder b_buf("b", kFloat, {N});
- Tensor* c = Compute(
+ Tensor c = Compute(
"c",
{
{N, "N"},
KernelScope ks;
auto half = ToDtype<at::Half>();
Placeholder a("a", half, {4});
- Tensor* b = Compute("b", {{4, "n"}}, [&](const VarHandle& i) {
+ Tensor b = Compute("b", {{4, "n"}}, [&](const VarHandle& i) {
return Cast::make(kFloat, a.load(i));
});
VarHandle n("n", kInt);
Placeholder a(BufHandle("a", {m, n}, kFloat));
Placeholder b(BufHandle("b", {m, n}, kFloat));
- Tensor* c = Compute(
+ Tensor c = Compute(
"c", {{m, "m"}, {n, "n"}}, [&](const VarHandle& i, const VarHandle& j) {
return a.load(i, j) + b.load(i, j);
});
const int num_iter = 3;
const int block_count = 16;
const int block_size = 128;
- Tensor* c = Compute(
+ Tensor c = Compute(
"c",
{
{num_iter, "n"},
constexpr int N = 4096;
VarHandle n("n", kInt);
Placeholder a(BufHandle("a", {n}, kFloat));
- Tensor* b = Compute(
+ Tensor b = Compute(
"b", {{n, "n"}}, [&](const VarHandle& i) { return a.load(i) * 2.0f; });
LoopNest l({b});
ForPtr inner;
KernelScope ks;
auto half = ToDtype<at::Half>();
Placeholder a("a", half, {4});
- Tensor* b = Compute("b", {{4, "n"}}, [&](const VarHandle& i) {
+ Tensor b = Compute("b", {{4, "n"}}, [&](const VarHandle& i) {
return Cast::make(half, ExprHandle(2.0f) * a.load(i));
});
- Tensor* c = Compute("c", {{4, "n"}}, [&](const VarHandle& i) {
- return Cast::make(kFloat, Cast::make(half, ExprHandle(42)) + b->load(i));
+ Tensor c = Compute("c", {{4, "n"}}, [&](const VarHandle& i) {
+ return Cast::make(kFloat, Cast::make(half, ExprHandle(42)) + b.load(i));
});
- Tensor* d = Compute("d", {{4, "n"}}, [&](const VarHandle& i) {
- return Cast::make(half, c->load(i));
+ Tensor d = Compute("d", {{4, "n"}}, [&](const VarHandle& i) {
+ return Cast::make(half, c.load(i));
});
LoopNest l({b, c, d});
KernelScope kernel_scope;
auto half = ToDtype<at::Half>();
Placeholder a("a", half, {4});
- Tensor* relu = Compute("relu", {{4, "n"}}, [&](const VarHandle& i) {
+ Tensor relu = Compute("relu", {{4, "n"}}, [&](const VarHandle& i) {
return Max::make(a.load(i), ExprHandle(alloc<HalfImm>(0)), true);
});
Placeholder a("a", kFloat, {4});
auto half = ToDtype<at::Half>();
Placeholder b("b", half, {4});
- Tensor* relu = Compute("relu", {{4, "n"}}, [&](const VarHandle& i) {
+ Tensor relu = Compute("relu", {{4, "n"}}, [&](const VarHandle& i) {
return Max::make(a.load(i), ExprHandle(alloc<FloatImm>(0)), true);
});
int B_SIZE = 50;
Placeholder a_buf("a", kFloat, {A_SIZE});
Placeholder b_buf("b", kFloat, {B_SIZE});
- Tensor* c = Compute("c", {{A_SIZE, "i"}}, [&](const VarHandle& i) {
+ Tensor c = Compute("c", {{A_SIZE, "i"}}, [&](const VarHandle& i) {
return a_buf.load(i) + 10;
});
- Tensor* d = Compute("d", {{B_SIZE, "i"}}, [&](const VarHandle& i) {
+ Tensor d = Compute("d", {{B_SIZE, "i"}}, [&](const VarHandle& i) {
return a_buf.load(i) + b_buf.load(i);
});
int B_SIZE = 100;
Placeholder a_buf("a", kFloat, {A_SIZE});
Placeholder b_buf("b", kFloat, {B_SIZE});
- Tensor* c = Compute("c", {{A_SIZE, "i"}}, [&](const VarHandle& i) {
+ Tensor c = Compute("c", {{A_SIZE, "i"}}, [&](const VarHandle& i) {
return a_buf.load(i) + 10;
});
- Tensor* d = Compute("d", {{B_SIZE, "i"}}, [&](const VarHandle& i) {
+ Tensor d = Compute("d", {{B_SIZE, "i"}}, [&](const VarHandle& i) {
return a_buf.load(i / 2) + b_buf.load(i);
});
int B_SIZE = 50;
Placeholder a_buf("a", kFloat, {A_SIZE});
Placeholder b_buf("b", kFloat, {B_SIZE});
- Tensor* c = Compute("c", {{A_SIZE, "i"}}, [&](const VarHandle& i) {
+ Tensor c = Compute("c", {{A_SIZE, "i"}}, [&](const VarHandle& i) {
return a_buf.load(i) + 10;
});
- Tensor* d = Compute("d", {{B_SIZE, "i"}}, [&](const VarHandle& i) {
+ Tensor d = Compute("d", {{B_SIZE, "i"}}, [&](const VarHandle& i) {
return a_buf.load(i) + b_buf.load(i);
});
int B_SIZE = 50;
Placeholder a_buf("a", kFloat, {A_SIZE});
Placeholder b_buf("b", kFloat, {B_SIZE});
- Tensor* c = Compute("c", {{A_SIZE, "i"}}, [&](const VarHandle& i) {
+ Tensor c = Compute("c", {{A_SIZE, "i"}}, [&](const VarHandle& i) {
return a_buf.load(i) + 10;
});
- Tensor* d = Compute("d", {{B_SIZE, "i"}}, [&](const VarHandle& i) {
+ Tensor d = Compute("d", {{B_SIZE, "i"}}, [&](const VarHandle& i) {
return a_buf.load(i) + b_buf.load(i);
});
int B_SIZE = 50;
Placeholder a_buf("a", kFloat, {OUTER_SIZE, A_SIZE});
Placeholder b_buf("b", kFloat, {OUTER_SIZE, B_SIZE});
- Tensor* c = Compute(
+ Tensor c = Compute(
"C",
{{OUTER_SIZE, "i"}, {A_SIZE, "j"}},
[&](const VarHandle& i, const VarHandle& j) {
return ExprHandle(2) * a_buf.load(i, j);
});
- Tensor* d = Compute(
+ Tensor d = Compute(
"D",
{{OUTER_SIZE, "i"}, {B_SIZE, "j"}},
[&](const VarHandle& i, const VarHandle& j) {
- return c->load(i, j * 2) + b_buf.load(i, j);
+ return c.load(i, j * 2) + b_buf.load(i, j);
});
LoopNest l({c, d});
VarHandle B_SIZE("B_SIZE", kInt);
Placeholder a_buf("a", kFloat, {OUTER_SIZE, A_SIZE});
Placeholder b_buf("b", kFloat, {OUTER_SIZE, B_SIZE});
- Tensor* c = Compute(
+ Tensor c = Compute(
"C",
{{OUTER_SIZE, "i"}, {A_SIZE, "j"}},
[&](const VarHandle& i, const VarHandle& j) {
return ExprHandle(2) * a_buf.load(i, j);
});
- Tensor* d = Compute(
+ Tensor d = Compute(
"D",
{{OUTER_SIZE, "i"}, {B_SIZE, "j"}},
[&](const VarHandle& i, const VarHandle& j) {
- return c->load(i, j * 2) + b_buf.load(i, j);
+ return c.load(i, j * 2) + b_buf.load(i, j);
});
LoopNest l({c, d});
int B_SIZE = 15;
Placeholder a_buf("a", kFloat, {OUTER_SIZE, A_SIZE});
Placeholder b_buf("b", kFloat, {OUTER_SIZE, B_SIZE});
- Tensor* c = Compute(
+ Tensor c = Compute(
"C",
{{OUTER_SIZE, "i"}, {A_SIZE, "j"}},
[&](const VarHandle& i, const VarHandle& j) {
return ExprHandle(2) * a_buf.load(i, j);
});
- Tensor* d = Compute(
+ Tensor d = Compute(
"D",
{{OUTER_SIZE, "i"}, {B_SIZE, "j"}},
[&](const VarHandle& i, const VarHandle& j) {
- return c->load(i, j * 2) + b_buf.load(i, j);
+ return c.load(i, j * 2) + b_buf.load(i, j);
});
LoopNest l({c, d});
int B_SIZE = 15;
Placeholder a_buf("a", kFloat, {OUTER_A_SIZE, A_SIZE});
Placeholder b_buf("b", kFloat, {OUTER_B_SIZE, B_SIZE});
- Tensor* c = Compute(
+ Tensor c = Compute(
"C",
{{OUTER_A_SIZE, "i"}, {A_SIZE, "j"}},
[&](const VarHandle& i, const VarHandle& j) {
return ExprHandle(2) * a_buf.load(i, j);
});
- Tensor* d = Compute(
+ Tensor d = Compute(
"D",
{{OUTER_B_SIZE, "i"}, {B_SIZE, "j"}},
[&](const VarHandle& i, const VarHandle& j) {
- return c->load(i, j * 2) + b_buf.load(i, j);
+ return c.load(i, j * 2) + b_buf.load(i, j);
});
LoopNest l({c, d});
int64_t dilation = 1;
int64_t groups = 1;
- Tensor* Result = new Tensor(
+ Tensor Result = Tensor(
ResultBuf.node(),
ExternalCall::make(
ResultBuf,
int64_t dilation = 1;
int64_t groups = 1;
- Tensor* Result = new Tensor(
+ Tensor Result = Tensor(
ResultBuf.node(),
ExternalCall::make(
ResultBuf,
Placeholder Weight("Weight", kFloat, {16, 16, 1, 1});
BufHandle ResultBuf("Result", {1, 16, 112, 112}, kFloat);
- Tensor* Result = new Tensor(
+ Tensor Result = Tensor(
ResultBuf.node(),
ExternalCall::make(
ResultBuf,
int64_t beta = 2;
int64_t alpha = 2;
- Tensor* Result = new Tensor(
+ Tensor Result = Tensor(
ResultBuf.node(),
ExternalCall::make(
ResultBuf,
weight, bias, c10::optional<at::Scalar>(), c10::optional<at::Scalar>());
Placeholder DummyPrepacked("DummyPrepacked", kFloat, {1});
- Tensor* Result = new Tensor(
+ Tensor Result = Tensor(
ResultBuf.node(),
ExternalCall::make(
ResultBuf,
c10::optional<at::Scalar>());
Placeholder DummyPrepacked("DummyPrepacked", kFloat, {1});
- Tensor* Result = new Tensor(
+ Tensor Result = Tensor(
ResultBuf.node(),
ExternalCall::make(
ResultBuf,
Placeholder B("", kFloat, toExprHandleVec(bShape));
BufHandle ResultBuf("Result", toExprHandleVec(resShape), kFloat);
- Tensor* Result = new Tensor(
+ Tensor Result = Tensor(
ResultBuf.node(),
ExternalCall::make(
ResultBuf,
Placeholder A("A", kFloat, toExprHandleVec(aShape));
BufHandle ResultBuf("Result", toExprHandleVec(resShape), kFloat);
- Tensor* Result = new Tensor(
+ Tensor Result = Tensor(
ResultBuf.node(),
ExternalCall::make(
ResultBuf, externCallName, {BufHandle(A.data())}, externCallArgs));
BufHandle ConvResultBuf("ConvResult", {1, 16, 32, 32}, kFloat);
BufHandle MatmulResultBuf("MatmulResult", {1, 16, 32, 32}, kFloat);
- Tensor* Input = Compute(
+ Tensor Input = Compute(
"Input",
{{1, "n"}, {16, "c"}, {32, "h"}, {32, "w"}},
[&](const VarHandle& n,
const VarHandle& c,
const VarHandle& h,
const VarHandle& w) { return FloatImm::make(5.0f); });
- Tensor* Weight = Compute(
+ Tensor Weight = Compute(
"Weight",
{{16, "n"}, {16, "c"}, {1, "kh"}, {1, "kw"}},
[&](const VarHandle& n,
const VarHandle& h,
const VarHandle& w) { return FloatImm::make(6.0f); });
- Tensor* ConvResult = new Tensor(
+ Tensor ConvResult = Tensor(
ConvResultBuf.node(),
ExternalCall::make(
ConvResultBuf,
"nnc_aten_conv2d",
- {BufHandle(Input->buf()), BufHandle(Weight->buf())},
+ {BufHandle(Input.buf()), BufHandle(Weight.buf())},
{}));
- Tensor* MatmulResult = new Tensor(
+ Tensor MatmulResult = Tensor(
MatmulResultBuf.node(),
ExternalCall::make(
MatmulResultBuf,
"nnc_aten_matmul",
- {BufHandle(ConvResult->buf()), BufHandle(ConvResult->buf())},
+ {BufHandle(ConvResult.buf()), BufHandle(ConvResult.buf())},
{}));
- Tensor* Result = Compute(
+ Tensor Result = Compute(
"Result",
{{1, "n"}, {16, "c"}, {32, "h"}, {32, "w"}},
[&](const VarHandle& n,
const VarHandle& c,
const VarHandle& h,
const VarHandle& w) {
- return ConvResult->load(n, c, h, w) + MatmulResult->load(n, c, h, w);
+ return ConvResult.load(n, c, h, w) + MatmulResult.load(n, c, h, w);
});
LoopNest l({Input, Weight, ConvResult, MatmulResult, Result});
BufHandle MatmulResultBuf("MatmulResult", {8, 8}, kFloat);
- Tensor* A = Compute(
+ Tensor A = Compute(
"A", {{8, "i"}, {8, "j"}}, [&](const VarHandle& i, const VarHandle& j) {
return FloatImm::make(5.0f);
});
- Tensor* B = Compute(
+ Tensor B = Compute(
"B", {{8, "i"}, {8, "j"}}, [&](const VarHandle& i, const VarHandle& j) {
return FloatImm::make(4.0f);
});
- Tensor* MatmulResult = new Tensor(
+ Tensor MatmulResult = Tensor(
MatmulResultBuf.node(),
ExternalCall::make(
MatmulResultBuf,
"nnc_aten_matmul",
- {BufHandle(A->buf()), BufHandle(B->buf())},
+ {BufHandle(A.buf()), BufHandle(B.buf())},
{}));
- Tensor* Result = Compute(
+ Tensor Result = Compute(
"Result",
{{8, "i"}, {8, "j"}},
[&](const VarHandle& i, const VarHandle& j) {
- return MatmulResult->load(i, j) + FloatImm::make(3.0f);
+ return MatmulResult.load(i, j) + FloatImm::make(3.0f);
});
StmtPtr root_stmt = alloc<Block>(std::vector<StmtPtr>(
- {A->stmt(), B->stmt(), MatmulResult->stmt(), Result->stmt()}));
- LoopNest l(root_stmt, {Result->buf()});
+ {A.stmt(), B.stmt(), MatmulResult.stmt(), Result.stmt()}));
+ LoopNest l(root_stmt, {Result.buf()});
// Inlining should not inline anything here since all Bufs are either
// defined or used in ExternalCalls
int M = 4;
int N = 20;
- Tensor* producer = Compute(
+ Tensor producer = Compute(
"producer",
{{M, "m"}, {N, "n"}},
[&](const ExprHandle& m, const ExprHandle& n) { return m * n; });
- Tensor* chunk_0 = Compute(
+ Tensor chunk_0 = Compute(
"chunk",
{{M, "m"}, {N / 2, "n"}},
[&](const ExprHandle& m, const ExprHandle& n) {
- return producer->load(m, n);
+ return producer.load(m, n);
});
- Tensor* chunk_1 = Compute(
+ Tensor chunk_1 = Compute(
"chunk",
{{M, "m"}, {N / 2, "n"}},
[&](const ExprHandle& m, const ExprHandle& n) {
- return producer->load(m, n + ExprHandle(N / 2));
+ return producer.load(m, n + ExprHandle(N / 2));
});
- Tensor* consumer = Compute(
+ Tensor consumer = Compute(
"consumer",
{{M, "i"}, {N / 2, "j"}},
[&](const ExprHandle& i, const ExprHandle& j) {
- return i * chunk_1->load(i, j);
+ return i * chunk_1.load(i, j);
});
LoopNest l({chunk_0, chunk_1, consumer});
#endif
}
-Tensor* lowerNanToNum(
+Tensor lowerNanToNum(
const std::vector<ArgValue>& inputs,
const std::vector<ExprHandle>& outputShape,
const c10::optional<ScalarType>& outputType,
KernelScope kernel_scope;
Placeholder a(BufHandle("A", {1}, kInt));
- Tensor* c =
+ Tensor c =
Compute("c", {{4, "i"}}, [&](const VarHandle& i) { return a.load(i); });
- Placeholder c_buf(BufHandle(c->buf()));
+ Placeholder c_buf(BufHandle(c.buf()));
LoopNest l({c});
StmtPtr s = l.root_stmt();
ASSERT_TRUE(LoopNest::vectorize(to<For>(to<Block>(s)->front())));
KernelScope kernel_scope;
Placeholder a(BufHandle("A", {128}, kInt));
- Tensor* c = Compute("c", {{128, "i"}}, [&](const VarHandle& i) {
+ Tensor c = Compute("c", {{128, "i"}}, [&](const VarHandle& i) {
return bitcast<float>(a.load(i));
});
- Placeholder c_buf(BufHandle(c->buf()));
+ Placeholder c_buf(BufHandle(c.buf()));
LoopNest l({c});
StmtPtr s = l.root_stmt();
ASSERT_TRUE(LoopNest::vectorize(to<For>(to<Block>(s)->front())));
TEST(LLVM, SimpleMath01) {
KernelScope kernel_scope;
const int N = 1024;
- Tensor* tensor = Compute("f", {{N, "i"}}, [](const VarHandle& i) {
+ Tensor tensor = Compute("f", {{N, "i"}}, [](const VarHandle& i) {
return cast<float>(i * i + 1);
});
LoopNest l({tensor});
StmtPtr stmt = l.root_stmt();
- Placeholder f_buf(BufHandle(tensor->buf()));
+ Placeholder f_buf(BufHandle(tensor.buf()));
LLVMCodeGen cg(stmt, {f_buf});
PaddedBuffer<float> f_v(N, "f_v");
const int N = 1024;
Placeholder a(BufHandle("a", {N}, kFloat));
Placeholder b(BufHandle("b", {N}, kFloat));
- Tensor* c = Compute("c", {{N, "i"}}, [&](const VarHandle& i) {
+ Tensor c = Compute("c", {{N, "i"}}, [&](const VarHandle& i) {
return a.load(i) * b.load(i);
});
- Placeholder c_buf(BufHandle(c->buf()));
+ Placeholder c_buf(BufHandle(c.buf()));
LoopNest l({c});
StmtPtr s = l.root_stmt();
const int N = 1024;
Placeholder a(BufHandle("a", {M, N}, kFloat));
Placeholder b(BufHandle("b", {N}, kFloat));
- Tensor* c = Compute(
+ Tensor c = Compute(
"c", {{M, "i"}, {N, "j"}}, [&](const VarHandle& i, const VarHandle& j) {
return a.load(i, j) + b.load(j);
});
- Placeholder c_buf(BufHandle(c->buf()));
+ Placeholder c_buf(BufHandle(c.buf()));
LoopNest l({c});
l.prepareForCodegen();
StmtPtr s = l.root_stmt();
VarHandle n("n", kInt);
Placeholder a(BufHandle("a", {n}, kFloat));
Placeholder b(BufHandle("b", {n}, kFloat));
- Tensor* c = Compute("c", {{n, "n"}}, [&](const VarHandle& i) {
+ Tensor c = Compute("c", {{n, "n"}}, [&](const VarHandle& i) {
return a.load(i) + b.load(i);
});
LoopNest l({c});
VarHandle n("n", kInt);
Placeholder a(BufHandle("a", {m, n}, kFloat));
Placeholder b(BufHandle("b", {m, n}, kFloat));
- Tensor* c = Compute(
+ Tensor c = Compute(
"c", {{m, "m"}, {n, "n"}}, [&](const VarHandle& i, const VarHandle& j) {
return a.load(i, j) + b.load(i, j);
});
KernelScope kernel_scope;
Placeholder a(BufHandle("a", {1}, kFloat));
- Tensor* c = Compute("c", {{0, "m"}}, [&](const VarHandle& m) { return m; });
+ Tensor c = Compute("c", {{0, "m"}}, [&](const VarHandle& m) { return m; });
LoopNest l({c});
l.prepareForCodegen();
// TODO: why doesn't implicit vector<DimArg> work?
std::vector<DimArg> axis = {DimArg(1)};
std::vector<DimArg> reduce_axis = {DimArg(M), DimArg(N)};
- Tensor* b = Reduce("sum", axis, Sum(), a, reduce_axis);
+ Tensor b = Reduce("sum", axis, Sum(), a, reduce_axis);
LoopNest loop({b});
loop.prepareForCodegen();
// TODO: why doesn't implicit vector<DimArg> work?
std::vector<DimArg> axis = {DimArg(1)};
std::vector<DimArg> reduce_axis = {DimArg(M), DimArg(N)};
- Tensor* b = Reduce("sum", axis, Sum(), a, reduce_axis);
+ Tensor b = Reduce("sum", axis, Sum(), a, reduce_axis);
LoopNest loop({b});
std::vector<ForPtr> loops = loop.getLoopStmtsFor(b);
loops = loop.getLoopStmtsFor(b);
loop_m = loops.at(2);
loop_n = loops.at(1);
- auto b_body = loop.getAllWritesToBuf(b->buf())[1];
+ auto b_body = loop.getAllWritesToBuf(b.buf())[1];
ASSERT_TRUE(loop.rfactor(b_body, loop_n));
loop.prepareForCodegen();
Placeholder a("a", kFloat, {1, M, N});
- Tensor* b = Reduce("sum", {{1, "K"}}, Sum(), a, {{M, "M"}, {N, "N"}});
+ Tensor b = Reduce("sum", {{1, "K"}}, Sum(), a, {{M, "M"}, {N, "N"}});
LoopNest loopnest({b});
std::vector<ForPtr> loops = loopnest.getLoopStmtsFor(b);
// Reorder n and m loops
loopnest.reorderAxis(loops.at(1), loops.at(2));
- auto b_body = loopnest.getAllWritesToBuf(b->buf()).at(1);
- auto all_loops = loopnest.getAllLoopNestsWritingToBuf(b->buf());
+ auto b_body = loopnest.getAllWritesToBuf(b.buf()).at(1);
+ auto all_loops = loopnest.getAllLoopNestsWritingToBuf(b.buf());
ASSERT_TRUE(all_loops.size() == 2 && all_loops[1].size() == 3);
ASSERT_TRUE(loopnest.rfactor(b_body, all_loops[1][1]));
auto distributed_loops = loopnest.distributeLoop(all_loops[1][1]);
KernelScope kernel_scope;
const int M = 4;
const int N = 6;
- Tensor* f = Compute(
+ Tensor f = Compute(
"f", {{M, "m"}, {N, "n"}}, [](const VarHandle& m, const VarHandle& n) {
return cast<float>(m + n);
});
KernelScope kernel_scope;
int M = 5;
int N = 7;
- Tensor* t1 =
+ Tensor t1 =
Compute("t1", {{M, "M"}}, [](const VarHandle& m) { return m + 1.f; });
- Tensor* t2 =
+ Tensor t2 =
Compute("t2", {{N, "N"}}, [](const VarHandle& n) { return n + 2.f; });
- Tensor* t3 = Compute(
+ Tensor t3 = Compute(
"t3",
{{M, "M"}, {N, "N"}},
[=](const VarHandle& m, const VarHandle& n) {
- return t1->load(m) * t2->load(n);
+ return t1.load(m) * t2.load(n);
});
- Tensor* t4 = Compute(
+ Tensor t4 = Compute(
"t4",
{{M, "M"}, {N, "N"}},
[=](const VarHandle& m, const VarHandle& n) {
- return t3->load(m, n) + m + n;
+ return t3.load(m, n) + m + n;
});
- LoopNest loop_nest(std::vector<Tensor*>({t4}), {t1, t2, t3, t4});
+ LoopNest loop_nest({t4}, {t1, t2, t3, t4});
std::vector<ForPtr> loop_list;
{
auto const& loops = loop_nest.getLoopStmtsFor(t1);
Placeholder AP(BufHandle("A", {M, K}, kFloat));
Placeholder BP(BufHandle("B", {K, N}, kFloat));
- Tensor* CT = Reduce(
+ Tensor CT = Reduce(
"gemm",
{{M, "M"}, {N, "N"}},
Sum(),
VarHandle N("N", kInt);
Placeholder a(BufHandle("a", {M, N}, kFloat));
Placeholder b(BufHandle("b", {N}, kFloat));
- Tensor* c = Compute(
+ Tensor c = Compute(
"c", {{M, "i"}, {N, "j"}}, [&](const VarHandle& i, const VarHandle& j) {
return a.load(i, j) + b.load(j);
});
std::vector<float> cv(M * N_value, 0);
std::vector<void*> args({av.data(), bv.data(), cv.data(), &N_value});
- LLVMCodeGen cg(s, {a, b, BufHandle(c->buf()), N});
+ LLVMCodeGen cg(s, {a, b, BufHandle(c.buf()), N});
cg.call_raw(args);
for (int i = 0; i < M; i++) {
}
}
- SimpleIREvaluator eval(s, {a, b, BufHandle(c->buf()), N});
+ SimpleIREvaluator eval(s, {a, b, BufHandle(c.buf()), N});
eval.call_raw(args);
for (int i = 0; i < M; i++) {
Placeholder a("a", kFloat, {M});
Placeholder b("b", kFloat, {M});
Placeholder c("c", kFloat, {M});
- Tensor* d = Compute("d", {{M, "m"}}, [&](const VarHandle& m) {
+ Tensor d = Compute("d", {{M, "m"}}, [&](const VarHandle& m) {
return a.load(m) * b.load(m) + c.load(m);
});
LoopNest nest({d});
TEST(LoopNest, ExprSimple01) {
KernelScope kernel_scope;
- Tensor* tensor = Compute(
+ Tensor tensor = Compute(
"f", {{16, "X"}, {5, "y"}}, [](const VarHandle& x, const VarHandle& y) {
return ExprHandle(1.0f) + cast<float>(x) * x + cast<float>(y) * y;
});
LoopNest l({tensor});
- std::vector<ForPtr> loops =
- l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
LoopNest::splitWithTail(loops[0], 2);
LoopNest::splitWithTail(loops[0], 2);
TEST(LoopNest, ExprLower01) {
KernelScope kernel_scope;
- Tensor* tensor = Compute(
+ Tensor tensor = Compute(
"f", {{16, "x"}, {5, "y"}}, [](const VarHandle& x, const VarHandle& y) {
return ExprHandle(1.0f) + cast<float>(x) * x + cast<float>(y) * y;
});
auto func = [](const ExprHandle& x, const ExprHandle& y) {
return ExprHandle(1.0f) + cast<float>(x) * x + cast<float>(y) * y;
};
- Tensor* tensor = Compute("f", {{26, "x"}, {5, "y"}}, func);
+ Tensor tensor = Compute("f", {{26, "x"}, {5, "y"}}, func);
LoopNest l({tensor});
- std::vector<ForPtr> loops =
- l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
LoopNest::splitWithTail(loops[0], 4);
auto func = [](const ExprHandle& x) {
return ExprHandle(1.0f) + cast<float>(x);
};
- Tensor* tensor = Compute("f", {{10, "x"}}, func);
+ Tensor tensor = Compute("f", {{10, "x"}}, func);
LoopNest l({tensor});
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr head;
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr tail;
- std::vector<ForPtr> loops =
- l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
loops[0]->set_gpu_block_index(LoopOptions::IDX_Y);
LoopNest::sliceHead(loops[0], 2, &head, &tail);
auto func = [](const ExprHandle& x) {
return ExprHandle(1.0f) + cast<float>(x);
};
- Tensor* tensor = Compute("f", {{10, "x"}}, func);
+ Tensor tensor = Compute("f", {{10, "x"}}, func);
LoopNest l({tensor});
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr head;
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr tail;
- std::vector<ForPtr> loops =
- l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
LoopNest::sliceTail(loops[0], 4, &head, &tail);
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
auto func = [](const ExprHandle& x) {
return ExprHandle(1.0f) + cast<float>(x);
};
- Tensor* tensor = Compute("f", {{10, "x"}}, func);
+ Tensor tensor = Compute("f", {{10, "x"}}, func);
LoopNest l({tensor});
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr head;
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr tail;
- std::vector<ForPtr> loops =
- l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
LoopNest::sliceHead(loops[0], 10, &head, &tail);
ASSERT_EQ(head, loops[0]);
auto func = [](const ExprHandle& x) {
return ExprHandle(1.0f) + cast<float>(x);
};
- Tensor* tensor = Compute("f", {{10, "x"}}, func);
+ Tensor tensor = Compute("f", {{10, "x"}}, func);
LoopNest l({tensor});
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr head;
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr tail;
- std::vector<ForPtr> loops =
- l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
LoopNest::sliceHead(loops[0], 100, &head, &tail);
ASSERT_EQ(head, loops[0]);
auto func = [](const ExprHandle& x) {
return ExprHandle(1.0f) + cast<float>(x);
};
- Tensor* tensor = Compute("f", {{10, "x"}}, func);
+ Tensor tensor = Compute("f", {{10, "x"}}, func);
LoopNest l({tensor});
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr head;
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr tail;
- std::vector<ForPtr> loops =
- l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
LoopNest::sliceHead(loops[0], 4, &head, &tail);
ASSERT_NE(head, nullptr);
auto func = [](const ExprHandle& x) {
return ExprHandle(1.0f) + cast<float>(x);
};
- Tensor* tensor = Compute("f", {{10, "x"}}, func);
+ Tensor tensor = Compute("f", {{10, "x"}}, func);
LoopNest l({tensor});
- std::vector<ForPtr> loops =
- l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr head;
auto func = [](const ExprHandle& x) {
return ExprHandle(1.0f) + cast<float>(x);
};
- Tensor* tensor = Compute("f", {{10, "x"}}, func);
+ Tensor tensor = Compute("f", {{10, "x"}}, func);
LoopNest l({tensor});
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr head;
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr tail;
- std::vector<ForPtr> loops =
- l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
LoopNest::sliceTail(loops[0], 10, &head, &tail);
ASSERT_EQ(head, nullptr);
auto func = [](const ExprHandle& x) {
return ExprHandle(1.0f) + cast<float>(x);
};
- Tensor* tensor = Compute("f", {{10, "x"}}, func);
+ Tensor tensor = Compute("f", {{10, "x"}}, func);
LoopNest l({tensor});
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr head;
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr tail;
- std::vector<ForPtr> loops =
- l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
LoopNest::sliceTail(loops[0], 100, &head, &tail);
ASSERT_EQ(head, nullptr);
auto func = [](const ExprHandle& x) {
return ExprHandle(1.0f) + cast<float>(x);
};
- Tensor* tensor = Compute("f", {{10, "x"}}, func);
+ Tensor tensor = Compute("f", {{10, "x"}}, func);
LoopNest l({tensor});
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr head;
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr tail;
- std::vector<ForPtr> loops =
- l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
LoopNest::sliceTail(loops[0], 4, &head, &tail);
ASSERT_NE(head, nullptr);
auto func = [](const ExprHandle& x) {
return ExprHandle(1.0f) + cast<float>(x);
};
- Tensor* tensor = Compute("f", {{100, "x"}}, func);
+ Tensor tensor = Compute("f", {{100, "x"}}, func);
LoopNest l({tensor});
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr inner;
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr tail;
- std::vector<ForPtr> loops =
- l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
// outer: [0, 4)
// inner: [0, 21)
// tail: [84, 100)
auto func = [](const ExprHandle& x) {
return ExprHandle(1.0f) + cast<float>(x);
};
- Tensor* tensor = Compute("f", {{10, "x"}}, func);
+ Tensor tensor = Compute("f", {{10, "x"}}, func);
LoopNest l({tensor});
- std::vector<ForPtr> loops =
- l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr head;
const std::vector<std::pair<int, int>>& expected_for_ranges) {
KernelScope kernel_scope;
VarHandle dim("dim", kInt);
- Tensor* tensor =
+ Tensor tensor =
Compute("f", {{dim, "x"}}, [](const ExprHandle& x) { return x; });
LoopNest l({tensor});
std::vector<ForPtr> loops =
- l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr head;
auto func = [](const ExprHandle& x) {
return ExprHandle(1.0f) + cast<float>(x);
};
- Tensor* tensor = Compute("f", {{199, "x"}}, func);
+ Tensor tensor = Compute("f", {{199, "x"}}, func);
LoopNest l({tensor});
- std::vector<ForPtr> loops =
- l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
LoopNest::splitWithTail(loops[0], 17);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
auto func = [](const ExprHandle& x, const ExprHandle& y) {
return ExprHandle(1.0f) + cast<float>(x) * x + cast<float>(y) * y;
};
- Tensor* tensor = Compute("f", {{24, "x"}, {5, "y"}}, func);
+ Tensor tensor = Compute("f", {{24, "x"}, {5, "y"}}, func);
LoopNest l({tensor});
- std::vector<ForPtr> loops =
- l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
LoopNest::splitWithTail(loops[0], 4);
StmtPtr stmt = l.root_stmt();
const int N = 5;
Placeholder a_buf("a", kFloat, {M, N});
Placeholder b_buf("b", kFloat, {M, N});
- Tensor* tensor = Compute(
+ Tensor tensor = Compute(
"f", {{M, "m"}, {N, "n"}}, [&](const ExprHandle& m, const ExprHandle& n) {
return a_buf.load(m, n) + b_buf.load(m, n) + 1.0f;
});
LoopNest l({tensor});
- std::vector<ForPtr> loops =
- l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
LoopNest::splitWithMask(loops[1], 4);
StmtPtr stmt = l.root_stmt();
const int M = 64;
Placeholder a_buf("a", kFloat, {M});
Placeholder b_buf("b", kFloat, {M});
- Tensor* tensor = Compute("f", {{M, "m"}}, [&](const ExprHandle& m) {
+ Tensor tensor = Compute("f", {{M, "m"}}, [&](const ExprHandle& m) {
return a_buf.load(m) + b_buf.load(m) + 1.0f;
});
LoopNest l({tensor});
- std::vector<ForPtr> loops =
- l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
LoopNest::splitWithMask(loops[0], 4);
LoopNest::splitWithMask(loops[0], 4);
const int M = 64, N = 64;
Placeholder a_buf("a", kFloat, {M, N});
Placeholder b_buf("b", kFloat, {M, N});
- Tensor* tensor = Compute(
+ Tensor tensor = Compute(
"f", {{M, "m"}, {N, "n"}}, [&](const ExprHandle& m, const ExprHandle& n) {
return a_buf.load({m, n}) + b_buf.load({m, n}) + 1.0f;
});
LoopNest l({tensor});
- std::vector<ForPtr> loops =
- l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
l.tile(loops[0], loops[1], 4, 8);
const int M = 64, N = 64;
Placeholder a_buf("a", kFloat, {M, N});
Placeholder b_buf("b", kFloat, {M, N});
- Tensor* tensor = Compute(
+ Tensor tensor = Compute(
"f", {{M, "m"}, {N, "n"}}, [&](const ExprHandle& m, const ExprHandle& n) {
return a_buf.load({m, n}) + b_buf.load({m, n}) + 1.0f;
});
LoopNest l({tensor});
- std::vector<ForPtr> loops =
- l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
l.tile(loops[0], loops[1], 5, 9);
const int M = 8, N = 8, L = 8, K = 8;
Placeholder a_buf("a", kFloat, {M, N, L, K});
Placeholder b_buf("b", kFloat, {M, N, L, K});
- Tensor* tensor = Compute(
+ Tensor tensor = Compute(
"f",
{{M, "m"}, {N, "n"}, {L, "l"}, {K, "k"}},
[&](const ExprHandle& m,
LoopNest nest({tensor});
std::vector<ForPtr> loops =
- nest.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ nest.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
nest.tile(loops[1], loops[2], 3, 3);
const int M = 21;
Placeholder a_buf("a", kFloat, {M});
Placeholder b_buf("b", kFloat, {M});
- Tensor* tensor = Compute("f", {{M, "m"}}, [&](const ExprHandle& m) {
+ Tensor tensor = Compute("f", {{M, "m"}}, [&](const ExprHandle& m) {
return a_buf.load(m) + b_buf.load(m) + 1.0f;
});
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
const int M = 21;
Placeholder a_buf("a", kFloat, {M});
Placeholder b_buf("b", kFloat, {M});
- Tensor* tensor = Compute("f", {{M, "m"}}, [&](const ExprHandle& m) {
+ Tensor tensor = Compute("f", {{M, "m"}}, [&](const ExprHandle& m) {
return a_buf.load(m) + b_buf.load(m) + 1.0f;
});
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
const int K = 6;
Placeholder a_buf("a", kFloat, {M, N});
Placeholder b_buf("b", kFloat, {N, K});
- Tensor* c = Compute(
+ Tensor c = Compute(
"broadcast_add",
{{M, "m"}, {N, "n"}, {K, "k"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
const int K = 6;
Placeholder a_buf("a", kFloat, {M, N});
Placeholder b_buf("b", kFloat, {N, K});
- Tensor* c = Compute(
+ Tensor c = Compute(
"broadcast_add",
{{M, "m"}, {N, "n"}, {K, "k"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
return a_buf.load(m, n) + b_buf.load(n, k);
});
- Tensor* d = Compute(
+ Tensor d = Compute(
"d",
{{M, "m"}, {N, "n"}, {K, "k"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
- return c->load(m, n, k) + 1;
+ return c.load(m, n, k) + 1;
});
- LoopNest l(std::vector<Tensor*>({d}), {c, d});
+ LoopNest l({d}, {c, d});
l.prepareForCodegen();
StmtPtr stmt = l.root_stmt();
std::ostringstream oss;
Placeholder c_buf("c", kFloat, {M, N});
Placeholder d_buf("d", kFloat, {M, K});
- Tensor* x = Compute(
+ Tensor x = Compute(
"x",
{{M, "m1"}, {N, "n1"}, {K, "k1"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
return a_buf.load(m, n) * b_buf.load(n, k);
});
- Tensor* y = Compute(
+ Tensor y = Compute(
"y",
{{M, "m2"}, {N, "n2"}, {K, "k2"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
- return c_buf.load(m, n) * d_buf.load(m, k) + x->load(m, n, k);
+ return c_buf.load(m, n) * d_buf.load(m, k) + x.load(m, n, k);
});
- LoopNest l1(std::vector<Tensor*>({y}), {x, y});
+ LoopNest l1({y}, {x, y});
LoopNest l2(l1);
- l2.computeInline(x->buf());
+ l2.computeInline(x.buf());
l1.prepareForCodegen();
l2.prepareForCodegen();
Placeholder c_buf("c", kFloat, {M, N});
Placeholder d_buf("d", kFloat, {M, K});
- Tensor* x = Compute(
+ Tensor x = Compute(
"x",
{{M, "m1"}, {N, "n1"}, {K, "k1"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
return a_buf.load(m, n) * b_buf.load(n, k);
});
- Tensor* y = Compute(
+ Tensor y = Compute(
"y",
{{M, "m2"}, {N, "n2"}, {K, "k2"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
- return c_buf.load(m, n) * d_buf.load(m, k) + x->load(m, n, k);
+ return c_buf.load(m, n) * d_buf.load(m, k) + x.load(m, n, k);
});
- Tensor* z = Compute(
+ Tensor z = Compute(
"z",
{{M, "m3"}, {N, "n3"}, {K, "k3"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
- return x->load(m, n, k) + y->load(m, n, k);
+ return x.load(m, n, k) + y.load(m, n, k);
});
- LoopNest l(std::vector<Tensor*>({z}), {x, y, z});
+ LoopNest l({z}, {x, y, z});
for (const std::string& order : inline_order) {
if (order == "x") {
- l.computeInline(x->buf());
+ l.computeInline(x.buf());
} else if (order == "y") {
- l.computeInline(y->buf());
+ l.computeInline(y.buf());
} else {
throw std::runtime_error("Invalid order: " + order);
}
}
if (inline_order.size() == 2) {
- Tensor* z2 = Compute(
+ Tensor z2 = Compute(
"z",
{{M, "m3"}, {N, "n3"}, {K, "k3"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
const int N = 5;
const int K = 6;
- Tensor* x = Compute(
+ Tensor x = Compute(
"x",
{{M, "m1"}, {N, "n1"}, {K, "k1"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
return Mod::make(Intrinsics::make(kRand, kInt), 5);
});
- Tensor* y = Compute(
+ Tensor y = Compute(
"y",
{{M, "m2"}, {N, "n2"}, {K, "k2"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
- return x->load(m, n, k) + x->load(m, n, k);
+ return x.load(m, n, k) + x.load(m, n, k);
});
- LoopNest l1(std::vector<Tensor*>({y}), {x, y});
- l1.computeInline(x->buf());
+ LoopNest l1({y}, {x, y});
+ l1.computeInline(x.buf());
// would normally compare results but Rand isn't implemented in the
// SimpleIREvaluator, even if we could seed it.
const int N = 5;
const int K = 6;
- Tensor* x = Compute(
+ Tensor x = Compute(
"x",
{{M, "m1"}, {N, "n1"}, {K, "k1"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
return m * n * k;
});
- Tensor* y = Compute(
+ Tensor y = Compute(
"y",
{{M, "m2"}, {N, "n2"}, {K, "k2"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
- return x->load(m, n, k) + Intrinsics::make(kRand, kInt) +
+ return x.load(m, n, k) + Intrinsics::make(kRand, kInt) +
Intrinsics::make(kRand, kInt);
});
- LoopNest l1(std::vector<Tensor*>({y}), {x, y});
- l1.computeInline(x->buf());
+ LoopNest l1({y}, {x, y});
+ l1.computeInline(x.buf());
// would normally compare results but Rand isn't implemented in the
// SimpleIREvaluator, even if we could seed it.
const int N = 5;
const int K = 6;
- Tensor* x = Compute("x", {{M, "m1"}}, [&](const VarHandle& m) {
+ Tensor x = Compute("x", {{M, "m1"}}, [&](const VarHandle& m) {
return Mod::make(Intrinsics::make(kRand, kInt), 5);
});
- Tensor* y = Compute(
+ Tensor y = Compute(
"y",
{{M, "m2"}, {N, "n2"}, {K, "k2"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
- return x->load(m) + x->load(m);
+ return x.load(m) + x.load(m);
});
- LoopNest l1(std::vector<Tensor*>({y}), {x, y});
- l1.computeInline(x->buf());
+ LoopNest l1({y}, {x, y});
+ l1.computeInline(x.buf());
// would normally compare results but Rand isn't implemented in the
// SimpleIREvaluator, even if we could seed it.
Placeholder a_buf("a", kFloat, {M, N});
Placeholder b_buf("b", kFloat, {N, K});
- Tensor* x = Compute(
+ Tensor x = Compute(
"x",
{{M, "m1"}, {N, "n1"}, {K, "k1"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
return a_buf.load(m, n) * b_buf.load(n, k);
});
- Tensor* y = Compute(
+ Tensor y = Compute(
"y",
{{M, "m2"}, {N, "n2"}, {K, "k2"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
- return Intrinsics::make(kSqrt, x->load(m, n, k));
+ return Intrinsics::make(kSqrt, x.load(m, n, k));
});
PaddedBuffer<float> a_v(M, N);
}
}
- LoopNest l1(std::vector<Tensor*>({y}), {x, y});
+ LoopNest l1({y}, {x, y});
LoopNest l2(l1);
- l2.computeInline(x->buf());
+ l2.computeInline(x.buf());
l1.prepareForCodegen();
l2.prepareForCodegen();
const int N = 5;
const int K = 6;
- Tensor* x = Compute(
+ Tensor x = Compute(
"x",
{{M, "m1"}, {N, "n1"}, {K, "k1"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
return Intrinsics::make(kRand, kFloat);
});
- Tensor* y = Compute(
+ Tensor y = Compute(
"y",
{{M, "m2"}, {N, "n2"}, {K, "k2"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
- return Intrinsics::make(kSqrt, x->load(m, n, k));
+ return Intrinsics::make(kSqrt, x.load(m, n, k));
});
- LoopNest l1(std::vector<Tensor*>({y}), {x, y});
- l1.computeInline(x->buf());
+ LoopNest l1({y}, {x, y});
+ l1.computeInline(x.buf());
StmtPtr stmt1 = IRSimplifier::simplify(l1.root_stmt());
// Split a Compute then inline it into another compute.
TEST(LoopNest, ScheduleSplitAThenInline) {
KernelScope kernel_scope;
- Tensor* a =
+ Tensor a =
Compute("a", {{18, "i"}}, [&](const VarHandle& i) { return i * i; });
- Tensor* b = Compute("b", {{2, "j"}}, [&](const VarHandle& j) {
- return a->load(j + ExprHandle(8));
+ Tensor b = Compute("b", {{2, "j"}}, [&](const VarHandle& j) {
+ return a.load(j + ExprHandle(8));
});
- LoopNest l(std::vector<Tensor*>({b}), {a, b});
- std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(a->buf()).at(0);
+ LoopNest l({b}, {a, b});
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(a.buf()).at(0);
LoopNest::splitWithMask(loops[0], 4);
- ASSERT_THROWS_WITH(l.computeInline(a->buf()), "compound indices");
+ ASSERT_THROWS_WITH(l.computeInline(a.buf()), "compound indices");
}
// Split a Compute then inline another Compute into it.
TEST(LoopNest, ScheduleSplitBThenInline) {
KernelScope kernel_scope;
- Tensor* a =
+ Tensor a =
Compute("a", {{18, "i"}}, [&](const VarHandle& i) { return i * i; });
- Tensor* b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) {
- return a->load(j + ExprHandle(8));
+ Tensor b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) {
+ return a.load(j + ExprHandle(8));
});
- LoopNest l(std::vector<Tensor*>({b}), {a, b});
- std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(b->buf()).at(0);
+ LoopNest l({b}, {a, b});
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(b.buf()).at(0);
LoopNest::splitWithMask(loops[0], 3);
- l.computeInline(a->buf());
+ l.computeInline(a.buf());
l.prepareForCodegen();
StmtPtr s = IRSimplifier::simplify(l.root_stmt());
// Split a Compute twice then inline it.
TEST(LoopNest, ScheduleSplitTwiceThenInline) {
KernelScope kernel_scope;
- Tensor* a =
+ Tensor a =
Compute("a", {{18, "i"}}, [&](const VarHandle& i) { return i * i; });
- Tensor* b = Compute("b", {{2, "j"}}, [&](const VarHandle& j) {
- return a->load(j + ExprHandle(8));
+ Tensor b = Compute("b", {{2, "j"}}, [&](const VarHandle& j) {
+ return a.load(j + ExprHandle(8));
});
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr i_inner;
- LoopNest l(std::vector<Tensor*>({b}), {a, b});
- std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(a->buf()).at(0);
+ LoopNest l({b}, {a, b});
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(a.buf()).at(0);
LoopNest::splitWithMask(loops[0], 4, &i_inner);
LoopNest::splitWithMask(i_inner, 2);
- ASSERT_THROWS_WITH(l.computeInline(a->buf()), "compound indices");
+ ASSERT_THROWS_WITH(l.computeInline(a.buf()), "compound indices");
}
// Inline a Compute, then split.
TEST(LoopNest, ScheduleInlineThenSplit) {
KernelScope kernel_scope;
- Tensor* a =
+ Tensor a =
Compute("a", {{18, "i"}}, [&](const VarHandle& i) { return i * i; });
- Tensor* b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) {
- return a->load(j + ExprHandle(8));
+ Tensor b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) {
+ return a.load(j + ExprHandle(8));
});
- LoopNest l(std::vector<Tensor*>({b}), {a, b});
- l.computeInline(a->buf());
+ LoopNest l({b}, {a, b});
+ l.computeInline(a.buf());
std::vector<ForPtr> loops = NodeFinder<For>::find(l.root_stmt());
LoopNest::splitWithMask(loops.back(), 3);
// Split a Compute, inline it, then split the result.
TEST(LoopNest, ScheduleSplitInlineThenSplit) {
KernelScope kernel_scope;
- Tensor* a =
+ Tensor a =
Compute("a", {{18, "i"}}, [&](const VarHandle& i) { return i * i; });
- Tensor* b = Compute("b", {{16, "j"}}, [&](const VarHandle& j) {
- return a->load(j + ExprHandle(8));
+ Tensor b = Compute("b", {{16, "j"}}, [&](const VarHandle& j) {
+ return a.load(j + ExprHandle(8));
});
- LoopNest l(std::vector<Tensor*>({b}), {a, b});
+ LoopNest l({b}, {a, b});
auto loops = NodeFinder<For>::find(l.root_stmt());
LoopNest::splitWithMask(loops.back(), 2);
- l.computeInline(a->buf());
+ l.computeInline(a.buf());
loops = NodeFinder<For>::find(l.root_stmt());
LoopNest::splitWithMask(loops.front(), 2);
// Oversplit a loop that is simplified out after inlining.
TEST(LoopNest, ScheduleSplitInlineSimplify) {
KernelScope kernel_scope;
- Tensor* a = Compute("a", {{18, "i"}}, [&](const VarHandle& i) {
+ Tensor a = Compute("a", {{18, "i"}}, [&](const VarHandle& i) {
return ExprHandle(4) * i - ExprHandle(2) * i;
});
- Tensor* b = Compute("b", {{2, "j"}}, [&](const VarHandle& j) {
- return a->load(j) - ExprHandle(1);
+ Tensor b = Compute("b", {{2, "j"}}, [&](const VarHandle& j) {
+ return a.load(j) - ExprHandle(1);
});
- LoopNest l(std::vector<Tensor*>({b}), {a, b});
- std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(a->buf()).at(0);
+ LoopNest l({b}, {a, b});
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(a.buf()).at(0);
LoopNest::splitWithMask(loops[0], 4);
- ASSERT_THROWS_WITH(l.computeInline(a->buf()), "compound indices");
+ ASSERT_THROWS_WITH(l.computeInline(a.buf()), "compound indices");
}
// Inline a Compute with two consumers.
TEST(LoopNest, ScheduleInlineThreeMixedOnce) {
KernelScope kernel_scope;
- Tensor* a =
+ Tensor a =
Compute("a", {{18, "i"}}, [&](const VarHandle& i) { return i * i; });
- Tensor* b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) {
- return a->load(j + ExprHandle(8));
+ Tensor b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) {
+ return a.load(j + ExprHandle(8));
});
- Tensor* c = Compute(
+ Tensor c = Compute(
"c", {{4, "k"}, {3, "l"}}, [&](const VarHandle& k, const VarHandle& l) {
- return a->load(k) * b->load(l);
+ return a.load(k) * b.load(l);
});
- LoopNest l(std::vector<Tensor*>({c}), {a, b, c});
- std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(a->buf()).at(0);
- l.computeInline(a->buf());
+ LoopNest l({c}, {a, b, c});
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(a.buf()).at(0);
+ l.computeInline(a.buf());
l.prepareForCodegen();
StmtPtr s = IRSimplifier::simplify(l.root_stmt());
// Inline Compute A into B, then inline B into C.
TEST(LoopNest, ScheduleInlineThreeMixedTwice) {
KernelScope kernel_scope;
- Tensor* a =
+ Tensor a =
Compute("a", {{18, "i"}}, [&](const VarHandle& i) { return i * i; });
- Tensor* b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) {
- return a->load(j + ExprHandle(8));
+ Tensor b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) {
+ return a.load(j + ExprHandle(8));
});
- Tensor* c = Compute(
+ Tensor c = Compute(
"c", {{4, "k"}, {3, "l"}}, [&](const VarHandle& k, const VarHandle& l) {
- return a->load(k) * b->load(l);
+ return a.load(k) * b.load(l);
});
- LoopNest l(std::vector<Tensor*>({c}), {a, b, c});
- std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(a->buf()).at(0);
- l.computeInline(a->buf());
- l.computeInline(b->buf());
+ LoopNest l({c}, {a, b, c});
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(a.buf()).at(0);
+ l.computeInline(a.buf());
+ l.computeInline(b.buf());
l.prepareForCodegen();
StmtPtr s = IRSimplifier::simplify(l.root_stmt());
// Inline a Compute that is both a producer and consumer.
TEST(LoopNest, ScheduleInlineThreeMixedInner) {
KernelScope kernel_scope;
- Tensor* a =
+ Tensor a =
Compute("a", {{18, "i"}}, [&](const VarHandle& i) { return i * i; });
- Tensor* b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) {
- return a->load(j + ExprHandle(8));
+ Tensor b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) {
+ return a.load(j + ExprHandle(8));
});
- Tensor* c = Compute(
+ Tensor c = Compute(
"c", {{4, "k"}, {3, "l"}}, [&](const VarHandle& k, const VarHandle& l) {
- return a->load(k) * b->load(l);
+ return a.load(k) * b.load(l);
});
- LoopNest l(std::vector<Tensor*>({c}), {a, b, c});
- std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(a->buf()).at(0);
- l.computeInline(b->buf());
+ LoopNest l({c}, {a, b, c});
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(a.buf()).at(0);
+ l.computeInline(b.buf());
l.prepareForCodegen();
StmtPtr s = IRSimplifier::simplify(l.root_stmt());
// Split 3 Computes, then inline the first two into the last.
TEST(LoopNest, ScheduleInlineThreeMixedSplit) {
KernelScope kernel_scope;
- Tensor* a =
+ Tensor a =
Compute("a", {{18, "i"}}, [&](const VarHandle& i) { return i * i; });
- Tensor* b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) {
- return a->load(j + ExprHandle(8));
+ Tensor b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) {
+ return a.load(j + ExprHandle(8));
});
- Tensor* c = Compute(
+ Tensor c = Compute(
"c", {{4, "k"}, {3, "l"}}, [&](const VarHandle& k, const VarHandle& l) {
- return a->load(k) * b->load(l);
+ return a.load(k) * b.load(l);
});
- LoopNest l(std::vector<Tensor*>({c}), {a, b, c});
- std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(a->buf()).at(0);
+ LoopNest l({c}, {a, b, c});
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(a.buf()).at(0);
LoopNest::splitWithMask(loops[0], 4);
- loops = l.getAllLoopNestsWritingToBuf(b->buf()).at(0);
+ loops = l.getAllLoopNestsWritingToBuf(b.buf()).at(0);
LoopNest::splitWithMask(loops[0], 3);
- loops = l.getAllLoopNestsWritingToBuf(c->buf()).at(0);
+ loops = l.getAllLoopNestsWritingToBuf(c.buf()).at(0);
LoopNest::splitWithMask(loops[0], 2);
- ASSERT_THROWS_WITH(l.computeInline(a->buf()), "compound indices");
+ ASSERT_THROWS_WITH(l.computeInline(a.buf()), "compound indices");
}
// Check that inlining works for output tensors too
const int N = 5;
const int K = 6;
- Tensor* x = Compute(
+ Tensor x = Compute(
"x",
{{M, "m1"}, {N, "n1"}, {K, "k1"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
return m * n * k;
});
- Tensor* y = Compute(
+ Tensor y = Compute(
"y",
{{M, "m2"}, {N, "n2"}, {K, "k2"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
- return x->load(m, n, k) + m;
+ return x.load(m, n, k) + m;
});
LoopNest l1({x, y});
- l1.computeInline(x->buf());
+ l1.computeInline(x.buf());
// would normally compare results but Rand isn't implemented in the
// SimpleIREvaluator, even if we could seed it.
Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Tensor* b = Compute(
+ Tensor b = Compute(
"f", {{kTotalSize, "i"}}, [&](const std::vector<VarHandle>& axes) {
return a_buf.load(axes[0]) + 11.0f;
});
- Tensor* c = Compute(
+ Tensor c = Compute(
"g", {{kTotalSize, "i"}}, [&](const std::vector<VarHandle>& axes) {
- return b->load(axes[0]) + 1.0f;
+ return b.load(axes[0]) + 1.0f;
});
LoopNest l({b, c});
Placeholder c(BufHandle("C", {ExprHandle(kTotalSize)}, kFloat));
Placeholder d(BufHandle("D", {ExprHandle(kTotalSize)}, kFloat));
- Tensor* e = Compute("e", {{kTotalSize, "i"}}, [&](const VarHandle& i) {
+ Tensor e = Compute("e", {{kTotalSize, "i"}}, [&](const VarHandle& i) {
return a.load(i) + b.load(i);
});
- Tensor* f = Compute("f", {{kTotalSize, "i"}}, [&](const VarHandle& i) {
- return e->load(i) + c.load(i);
+ Tensor f = Compute("f", {{kTotalSize, "i"}}, [&](const VarHandle& i) {
+ return e.load(i) + c.load(i);
});
- Tensor* g = Compute("g", {{kTotalSize, "i"}}, [&](const VarHandle& i) {
- return f->load(i) + d.load(i);
+ Tensor g = Compute("g", {{kTotalSize, "i"}}, [&](const VarHandle& i) {
+ return f.load(i) + d.load(i);
});
- LoopNest l(std::vector<Tensor*>({g}), {e, f, g});
+ LoopNest l({g}, {e, f, g});
l.computeInline(l.getLoopBodyFor(e));
l.computeInline(l.getLoopBodyFor(f));
l.prepareForCodegen();
VarHandle n("n", kInt);
Placeholder a(BufHandle("a", {m, n}, kFloat));
Placeholder b(BufHandle("b", {m, n}, kFloat));
- Tensor* c = Compute(
+ Tensor c = Compute(
"c", {{m, "m"}, {n, "n"}}, [&](const VarHandle& i, const VarHandle& j) {
return a.load(i, j) + b.load(i, j);
});
// and the temp should be used in B.
KernelScope kernel_scope;
VarHandle N("N", kInt);
- Tensor* A = Compute(
+ Tensor A = Compute(
"A", {{N, "i_a"}}, [&](const VarHandle& i_a) { return i_a * i_a; });
- Tensor* B = Compute(
- "B", {{N, "i_b"}}, [&](const VarHandle& i_b) { return A->load(i_b); });
- LoopNest l(std::vector<Tensor*>({B}), {A, B});
- std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(B->buf()).at(0);
+ Tensor B = Compute(
+ "B", {{N, "i_b"}}, [&](const VarHandle& i_b) { return A.load(i_b); });
+ LoopNest l({B}, {A, B});
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(B.buf()).at(0);
LoopNest::computeAt(l.getLoopBodyFor(A), loops[0]);
l.prepareForCodegen();
StmtPtr s = l.root_stmt();
const int kW = 16, kH = 16;
VarHandle W("W", kInt);
VarHandle H("H", kInt);
- Tensor* p = Compute(
+ Tensor p = Compute(
"prod",
{{H + 1, "py"}, {W + 1, "px"}},
[&](const VarHandle& py, const VarHandle& px) { return px * py; });
- Tensor* c = Compute(
+ Tensor c = Compute(
"cons",
{{H, "cy"}, {W, "cx"}},
[&](const VarHandle& y, const VarHandle& x) {
- return p->load(y, x) + p->load(y + 1, x) + p->load(y, x + 1) +
- p->load(y + 1, x + 1);
+ return p.load(y, x) + p.load(y + 1, x) + p.load(y, x + 1) +
+ p.load(y + 1, x + 1);
});
std::vector<int> c_ref(kW * kH, 0);
c_ref[y * kW + x] = y * x + (y + 1) * x + y * (x + 1) + (y + 1) * (x + 1);
}
}
- LoopNest orig_loopnest(std::vector<Tensor*>({c}), {p, c});
+ LoopNest orig_loopnest({c}, {p, c});
{
// First let's try to compute P at axis cy (the outer loop)
LoopNest l(orig_loopnest);
- std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(c->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(c.buf()).at(0);
LoopNest::computeAt(l.getLoopBodyFor(p), loops[0]);
l.prepareForCodegen();
StmtPtr s = l.root_stmt();
{
// Now let's try to compute P at axis cx (the inner loop)
LoopNest l(orig_loopnest);
- std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(c->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(c.buf()).at(0);
LoopNest::computeAt(l.getLoopBodyFor(p), loops[1]);
l.prepareForCodegen();
StmtPtr s = l.root_stmt();
const int kW = 16, kH = 16;
VarHandle W("W", kInt);
VarHandle H("H", kInt);
- Tensor* A = Compute(
+ Tensor A = Compute(
"A",
{{H + 1, "ay"}, {W + 1, "ax"}},
[&](const VarHandle& ay, const VarHandle& ax) { return ax * ay; });
- Tensor* B = Compute(
+ Tensor B = Compute(
"B",
{{H + 1, "by"}, {W + 1, "bx"}},
- [&](const VarHandle& by, const VarHandle& bx) {
- return A->load(by, bx);
- });
- Tensor* C = Compute(
+ [&](const VarHandle& by, const VarHandle& bx) { return A.load(by, bx); });
+ Tensor C = Compute(
"C",
{{H, "cy"}, {W, "cx"}},
[&](const VarHandle& cy, const VarHandle& cx) {
- return B->load(cy, cx + 1);
+ return B.load(cy, cx + 1);
});
- Tensor* D = Compute(
+ Tensor D = Compute(
"D",
{{H, "dy"}, {W, "dx"}},
[&](const VarHandle& dy, const VarHandle& dx) {
- return A->load(dy + 1, dx) + C->load(dy, dx);
+ return A.load(dy + 1, dx) + C.load(dy, dx);
});
std::vector<int> c_ref(kW * kH, 0);
}
}
- LoopNest orig_loopnest(std::vector<Tensor*>({D}), {A, B, C, D});
+ LoopNest orig_loopnest({D}, {A, B, C, D});
{
// First let's try to compute A at axis dy (the outer loop)
LoopNest l(orig_loopnest);
- std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(D->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(D.buf()).at(0);
LoopNest::computeAt(l.getLoopBodyFor(A), loops[0]);
l.prepareForCodegen();
StmtPtr s = l.root_stmt();
{
// Now let's try to compute A at axis dx (the inner loop)
LoopNest l(orig_loopnest);
- std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(D->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(D.buf()).at(0);
LoopNest::computeAt(l.getLoopBodyFor(A), loops[1]);
l.prepareForCodegen();
StmtPtr s = l.root_stmt();
VarHandle W("W", kInt);
VarHandle H("H", kInt);
- Tensor* p =
+ Tensor p =
Compute("prod", {{H + 1, "py"}, {W + 1, "px"}}, [&](Axis py, Axis px) {
return px * py;
});
- Tensor* c = Reduce(
+ Tensor c = Reduce(
"cons",
{{H, "cy"}, {W, "cx"}},
Sum(),
- [&](Axis y, Axis x, Axis r, Axis s) { return p->load(y + r, x + s); },
+ [&](Axis y, Axis x, Axis r, Axis s) { return p.load(y + r, x + s); },
{{2, "r"}, {2, "s"}});
std::vector<int> c_ref(kW * kH, 0);
c_ref[y * kW + x] = y * x + (y + 1) * x + y * (x + 1) + (y + 1) * (x + 1);
}
}
- LoopNest orig_loopnest(std::vector<Tensor*>({c}), {p, c});
+ LoopNest orig_loopnest({c}, {p, c});
checkIR(orig_loopnest.root_stmt(), R"IR(
# CHECK: for (int py = 0; py < H + 1; py++) {
# CHECK: for (int px = 0; px < W + 1; px++) {
{
// First let's try to compute P at axis cy (the outer loop)
LoopNest l(orig_loopnest);
- auto loops = l.getAllLoopNestsWritingToBuf(c->buf()).at(0);
+ auto loops = l.getAllLoopNestsWritingToBuf(c.buf()).at(0);
LoopNest::computeAt(l.getLoopBodyFor(p), loops[0]);
// FIXME: Calling simplify here breaks the IR:
// MALFORMED INPUT: could not find base node in Load - temp[...]
{
// Now let's try to compute P at axis cx (the inner loop)
LoopNest l(orig_loopnest);
- std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(c->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(c.buf()).at(0);
LoopNest::computeAt(l.getLoopBodyFor(p), loops[1]);
l.simplify();
l.eliminateDeadStores();
int Pad = 1;
Placeholder IP("input", kFloat, {H});
- Tensor* A =
+ Tensor A =
Compute("A", {{N, "np"}, {H + 2 * Pad, "hp"}}, [&](Axis n, Axis h) {
auto cond = CompareSelect::make(h, Pad, 1, 0, kLT);
cond = CompareSelect::make(h, H + Pad, 1, cond, kGE);
return ifThenElse(cond, 0.f, IP.load(n, h - Pad));
});
- Tensor* B = Reduce(
+ Tensor B = Reduce(
"B",
{{N, "n"}, {H, "h"}},
Sum(),
- [&](Axis n, Axis h, Axis r) { return A->load(n, h + r); },
+ [&](Axis n, Axis h, Axis r) { return A.load(n, h + r); },
{{R, "r"}});
LoopNest l({B});
checkIR(l.root_stmt(), R"IR(
# CHECK: }
# CHECK: }
)IR");
- std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(B->buf()).at(0);
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(B.buf()).at(0);
LoopNest::computeAt(l.getLoopBodyFor(A), loops[0]);
// FIXME: The current IR is totally broken. The body of the inlined loop is:
TEST(LoopNest, LoopNestReorderAxis1) {
KernelScope kernel_scope;
- Tensor* tensor = Compute(
+ Tensor tensor = Compute(
"f", {{2, "x"}, {3, "y"}}, [](const VarHandle& x, const VarHandle& y) {
return ExprHandle(1.0f) + cast<float>(x) * x + cast<float>(y) * y;
});
SimpleIREvaluator cg(stmt1, {tensor});
cg.call({stmt1_output});
- auto loops = l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ auto loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
LoopNest::reorderAxis(loops[0], loops[1]);
StmtPtr stmt2 = Stmt::clone(l.root_stmt());
}
// Reorder them back.
- loops = l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
LoopNest::reorderAxis(loops[0], loops[1]);
StmtPtr stmt3 = l.root_stmt();
TEST(LoopNest, LoopNestReorderPartialAxes) {
KernelScope kernel_scope;
- Tensor* tensor = Compute(
+ Tensor tensor = Compute(
"f",
{{2, "x"}, {3, "y"}, {4, "z"}},
[](const VarHandle& x, const VarHandle& y, const VarHandle& z) {
SimpleIREvaluator cg(stmt1, {tensor});
cg.call({stmt1_output});
- auto loops = l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ auto loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
LoopNest::reorderAxis(loops[0], loops[1]);
ASSERT_EQ(loopOrderHelper.getOrder(l.root_stmt()), "y,x,z,");
ASSERT_EQ(stmt1_output[i], stmt2_output[i]);
}
- loops = l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
LoopNest::reorderAxis(loops[1], loops[2]);
ASSERT_EQ(loopOrderHelper.getOrder(l.root_stmt()), "y,z,x,");
TEST(LoopNest, LoopNestReorderInternalAxis) {
KernelScope kernel_scope;
- Tensor* tensor = Compute(
+ Tensor tensor = Compute(
"f",
{{1, "w"}, {2, "x"}, {3, "y"}, {4, "z"}},
[](const VarHandle& w,
SimpleIREvaluator cg(stmt1, {tensor});
cg.call({stmt1_output});
- auto loops = l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ auto loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
LoopNest::reorderAxis(loops[2], loops[1]);
ASSERT_EQ(loopOrderHelper.getOrder(l.root_stmt()), "w,y,x,z,");
TEST(LoopNest, LoopNestReorderEnclosingAxis) {
KernelScope kernel_scope;
- Tensor* tensor = Compute(
+ Tensor tensor = Compute(
"f",
{{1, "w"}, {2, "x"}, {3, "y"}, {4, "z"}},
[](const VarHandle& w,
SimpleIREvaluator cg(stmt1, {tensor});
cg.call({stmt1_output});
- auto loops = l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ auto loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
LoopNest::reorderAxis(loops[0], loops[3]);
ASSERT_EQ(loopOrderHelper.getOrder(l.root_stmt()), "z,x,y,w,");
TEST(LoopNest, LoopNestReorderSameAxis) {
KernelScope kernel_scope;
- Tensor* tensor = Compute(
+ Tensor tensor = Compute(
"f", {{2, "x"}, {3, "y"}}, [](const VarHandle& x, const VarHandle& y) {
return ExprHandle(1.0f) + cast<float>(x) * x + cast<float>(y) * y;
});
LoopNest l({tensor});
StmtPtr stmt1 = Stmt::clone(l.root_stmt());
- auto loops = l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ auto loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
LoopNest::reorderAxis(loops[1], loops[1]);
StmtPtr stmt2 = Stmt::clone(l.root_stmt());
KernelScope kernel_scope;
- Tensor* tensor = Compute(
+ Tensor tensor = Compute(
"f",
{{2, "x"}, {3, "y"}, {4, "z"}},
[](const VarHandle& x, const VarHandle& y, const VarHandle& z) {
Placeholder extra(BufHandle("res", {6, 3}, kFloat));
- auto loops = l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ auto loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
VarHandle i = VarHandle(loops[0]->var());
*
*
*/
- loops = l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0);
+ loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0);
LoopNest::reorderAxis(loops[0], loops[2]);
StmtPtr stmt3 = Stmt::clone(l.root_stmt());
int index2) {
KernelScope kernel_scope;
- Tensor* c = Compute(
+ Tensor c = Compute(
"5d",
{{2, "a"}, {3, "b"}, {2, "c"}, {3, "d"}, {2, "e"}},
[](const std::vector<VarHandle>&) { return -1; });
Placeholder extra(BufHandle("extra", {5}, kInt));
- auto loops = l.getAllLoopNestsWritingToBuf(c->buf()).at(0);
+ auto loops = l.getAllLoopNestsWritingToBuf(c.buf()).at(0);
int j = 0;
for (auto l : loops) {
// Add an increment at each layer of the loop which counts the number of
ASSERT_EQ(extra1[i], expected_loops);
}
- loops = l.getAllLoopNestsWritingToBuf(c->buf()).at(0);
+ loops = l.getAllLoopNestsWritingToBuf(c.buf()).at(0);
LoopNest::reorderAxis(loops[index1], loops[index2]);
StmtPtr stmt2 = Stmt::clone(l.root_stmt());
Placeholder c_buf("c", kFloat, {M, N});
Placeholder d_buf("d", kFloat, {M, K});
- Tensor* x = Compute(
+ Tensor x = Compute(
"x",
{{M, "m1"}, {N, "n1"}, {K, "k1"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
return a_buf.load(m, n) * b_buf.load(n, k);
});
- Tensor* y = Compute(
+ Tensor y = Compute(
"y",
{{M, "m2"}, {N, "n2"}, {K, "k2"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
- return c_buf.load(m, n) * d_buf.load(m, k) + x->load(m, n, k);
+ return c_buf.load(m, n) * d_buf.load(m, k) + x.load(m, n, k);
});
- Tensor* z = Compute(
+ Tensor z = Compute(
"z",
{{M, "m3"}, {N, "n3"}, {K, "k3"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
- return x->load(m, n, k) + y->load(m, n, k);
+ return x.load(m, n, k) + y.load(m, n, k);
});
- LoopNest l(std::vector<Tensor*>({z}), {x, y, z});
+ LoopNest l({z}, {x, y, z});
ForPtr a = nullptr;
ForPtr b = nullptr;
auto fors = NodeFinder<For>::find(l.root_stmt());
TEST(LoopNest, OuterLoopVectorization) {
KernelScope kernel_scope;
- Tensor* tensor = Compute(
+ Tensor tensor = Compute(
"f", {{8, "X"}, {8, "y"}}, [](const VarHandle& x, const VarHandle& y) {
return ExprHandle(1.0f) + cast<float>(x) * x + cast<float>(y) * y;
});
LoopNest l({tensor});
ASSERT_TRUE(
- LoopNest::vectorize(l.getAllLoopNestsWritingToBuf(tensor->buf())[0][0]));
+ LoopNest::vectorize(l.getAllLoopNestsWritingToBuf(tensor.buf())[0][0]));
StmtPtr root_stmt = l.root_stmt();
BlockPtr outer_block = to<Block>(root_stmt);
std::string constantUpperBoundLoopIR(int upper_bound_val) {
KernelScope kernel_scope;
ExprHandle upper_bound(upper_bound_val);
- Tensor* A = Compute(
+ Tensor A = Compute(
"A", {{upper_bound, "x"}}, [&](const VarHandle& x) { return x * 2; });
LoopNest l({A});
- std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(A->buf())[0];
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(A.buf())[0];
StmtPtr unrolled = nullptr;
LoopNest::unroll(loops[0], &unrolled);
std::ostringstream oss;
KernelScope kernel_scope;
ExprHandle outer_bound(3);
ExprHandle inner_bound(4);
- Tensor* A = Compute(
+ Tensor A = Compute(
"A",
{{outer_bound, "x"}, {inner_bound, "y"}},
[&](const VarHandle& x, const VarHandle& y) { return x + y; });
LoopNest l({A});
- std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(A->buf())[0];
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(A.buf())[0];
StmtPtr unrolled = nullptr;
LoopNest::unroll(loops[0], &unrolled);
checkIR(unrolled, R"IR(
KernelScope kernel_scope;
ExprHandle outer_bound(3);
ExprHandle inner_bound(4);
- Tensor* A = Compute(
+ Tensor A = Compute(
"A",
{{outer_bound, "x"}, {inner_bound, "y"}},
[&](const VarHandle& x, const VarHandle& y) { return x + y; });
LoopNest l({A});
- std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(A->buf())[0];
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(A.buf())[0];
StmtPtr unrolled = nullptr;
LoopNest::unroll(
static_to<For>(loops[0]->body()->stmts().front()), &unrolled);
TEST(LoopNest, NoUnroll) {
KernelScope kernel_scope;
VarHandle upper_bound("N", kInt);
- Tensor* A = Compute(
+ Tensor A = Compute(
"A", {{upper_bound, "x"}}, [&](const VarHandle& x) { return x * 2; });
LoopNest l({A});
- std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(A->buf())[0];
+ std::vector<ForPtr> loops = l.getAllLoopNestsWritingToBuf(A.buf())[0];
StmtPtr unrolled = nullptr;
ASSERT_THROWS_WITH(
LoopNest::unroll(loops[0], &unrolled), "non-constant loop");
// Create a dummy tensor to construct LoopNest.
ExprHandle n(100);
Placeholder a(BufHandle("a", {n}, kFloat));
- Tensor* b =
+ Tensor b =
Compute("b", {{n, "i"}}, [&](const VarHandle& i) { return a.load(i); });
LoopNest l({b});
VarHandle m("m", kInt);
VarHandle n("n", kInt);
Placeholder b(BufHandle("b", {m, n}, kFloat));
- Tensor* c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}});
+ Tensor c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}});
LoopNest loop({c});
HashProvider hasher;
auto hash_before = hasher.hash(loop.root_stmt());
- auto loops = loop.getAllLoopNestsWritingToBuf(c->buf())[1];
+ auto loops = loop.getAllLoopNestsWritingToBuf(c.buf())[1];
ForPtr flattened = nullptr;
ASSERT_FALSE(LoopNest::flatten(loops, &flattened));
ASSERT_EQ(flattened, nullptr);
const int kTotalSize = 8;
Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat));
- Tensor* a = Compute("a", {{kTotalSize, "i"}}, [&](const VarHandle& i) {
+ Tensor a = Compute("a", {{kTotalSize, "i"}}, [&](const VarHandle& i) {
return a_buf.load(i);
});
- Tensor* reshape = Compute(
+ Tensor reshape = Compute(
"reshape",
{{kTotalSize / 2, "i"}, {2, "j"}},
- [&](const VarHandle& i, const VarHandle& j) { return a->load(i, j); });
- LoopNest l(std::vector<Tensor*>({reshape}), {a, reshape});
+ [&](const VarHandle& i, const VarHandle& j) { return a.load(i, j); });
+ LoopNest l({reshape}, {a, reshape});
ASSERT_THROWS_WITH(
l.computeInline(l.getLoopBodyFor(a)),
"Placeholder indexed access is inconsistent with its rank");
TEST(LoopNest, CacheReadsSimple) {
KernelScope kernel_scope;
- Tensor* A = Compute(
+ Tensor A = Compute(
"A", {{64, "i"}, {64, "j"}}, [](const VarHandle& i, const VarHandle& j) {
return i * j;
});
- Tensor* B = Compute(
+ Tensor B = Compute(
"B", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) {
- return A->load(i + 30, j + 3);
+ return A.load(i + 30, j + 3);
});
- Tensor* C = Compute(
+ Tensor C = Compute(
"C", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) {
- return A->load(i + 10, j + 20) + A->load(i + 30, j + 40);
+ return A.load(i + 10, j + 20) + A.load(i + 30, j + 40);
});
- LoopNest l(std::vector<Tensor*>({B, C}), {A, B, C});
- StmtPtr j_loop = l.getAllLoopNestsWritingToBuf(B->buf())[0][1];
- LoopNest::cacheAccesses(A->buf(), "A_local", j_loop);
+ LoopNest l({B, C}, {A, B, C});
+ StmtPtr j_loop = l.getAllLoopNestsWritingToBuf(B.buf())[0][1];
+ LoopNest::cacheAccesses(A.buf(), "A_local", j_loop);
l.prepareForCodegen();
StmtPtr result = IRSimplifier::simplify(l.root_stmt());
TEST(LoopNest, CacheReadsOuter) {
KernelScope kernel_scope;
- Tensor* A = Compute(
+ Tensor A = Compute(
"A", {{64, "i"}, {64, "j"}}, [](const VarHandle& i, const VarHandle& j) {
return i * j;
});
- Tensor* B = Compute(
+ Tensor B = Compute(
"B", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) {
- return A->load(i + 30, j + 40) + A->load(i + 31, j + 41);
+ return A.load(i + 30, j + 40) + A.load(i + 31, j + 41);
});
- Tensor* C = Compute(
+ Tensor C = Compute(
"C", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) {
- return A->load(i + 10, j + 20) + A->load(i + 30, j + 40);
+ return A.load(i + 10, j + 20) + A.load(i + 30, j + 40);
});
- LoopNest l(std::vector<Tensor*>({B, C}), {A, B, C});
- StmtPtr i_loop = l.getAllLoopNestsWritingToBuf(B->buf())[0][0];
- LoopNest::cacheAccesses(A->buf(), "A_local", i_loop);
+ LoopNest l({B, C}, {A, B, C});
+ StmtPtr i_loop = l.getAllLoopNestsWritingToBuf(B.buf())[0][0];
+ LoopNest::cacheAccesses(A.buf(), "A_local", i_loop);
l.prepareForCodegen();
StmtPtr result = IRSimplifier::simplify(l.root_stmt());
TEST(LoopNest, CacheReadsInternal) {
KernelScope kernel_scope;
- Tensor* A = Compute(
+ Tensor A = Compute(
"A", {{64, "i"}, {64, "j"}}, [](const VarHandle& i, const VarHandle& j) {
return i * j;
});
- Tensor* B = Compute(
+ Tensor B = Compute(
"B", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) {
- return A->load(i + 30, j + 40) + A->load(i + 31, j + 41);
+ return A.load(i + 30, j + 40) + A.load(i + 31, j + 41);
});
- Tensor* C = Compute(
+ Tensor C = Compute(
"C", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) {
- return A->load(i + 10, j + 20) + A->load(i + 30, j + 40);
+ return A.load(i + 10, j + 20) + A.load(i + 30, j + 40);
});
- LoopNest l(std::vector<Tensor*>({B, C}), {A, B, C});
- StmtPtr j_loop = l.getAllLoopNestsWritingToBuf(B->buf())[0][1];
- LoopNest::cacheAccesses(A->buf(), "A_local", j_loop);
+ LoopNest l({B, C}, {A, B, C});
+ StmtPtr j_loop = l.getAllLoopNestsWritingToBuf(B.buf())[0][1];
+ LoopNest::cacheAccesses(A.buf(), "A_local", j_loop);
l.prepareForCodegen();
StmtPtr result = IRSimplifier::simplify(l.root_stmt());
TEST(LoopNest, CacheReadsInner) {
KernelScope kernel_scope;
- Tensor* A = Compute(
+ Tensor A = Compute(
"A", {{64, "i"}, {64, "j"}}, [](const VarHandle& i, const VarHandle& j) {
return i * j;
});
// note im changing the offset of the first arg of the first call to A.
- Tensor* B = Compute(
+ Tensor B = Compute(
"B", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) {
- return A->load(i + 34, j + 40) + A->load(i + 30, j + 41);
+ return A.load(i + 34, j + 40) + A.load(i + 30, j + 41);
});
- Tensor* C = Compute(
+ Tensor C = Compute(
"C", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) {
- return A->load(i + 10, j + 20) + A->load(i + 30, j + 40);
+ return A.load(i + 10, j + 20) + A.load(i + 30, j + 40);
});
- LoopNest l(std::vector<Tensor*>({B, C}), {A, B, C});
+ LoopNest l({B, C}, {A, B, C});
StmtPtr body = l.getLoopBodyFor(B);
- LoopNest::cacheAccesses(A->buf(), "A_local", body);
+ LoopNest::cacheAccesses(A.buf(), "A_local", body);
l.prepareForCodegen();
StmtPtr result = IRSimplifier::simplify(l.root_stmt());
TEST(LoopNest, CacheWritesSimple) {
KernelScope kernel_scope;
- Tensor* A = Compute(
+ Tensor A = Compute(
"A", {{64, "i"}, {64, "j"}}, [](const VarHandle& i, const VarHandle& j) {
return i * j;
});
- Tensor* B = Compute(
+ Tensor B = Compute(
"B", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) {
- return A->load(i + 30, j + 40) + A->load(i + 31, j + 41);
+ return A.load(i + 30, j + 40) + A.load(i + 31, j + 41);
});
- Tensor* C = Compute(
+ Tensor C = Compute(
"C", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) {
- return A->load(i + 10, j + 20) + A->load(i + 30, j + 40);
+ return A.load(i + 10, j + 20) + A.load(i + 30, j + 40);
});
- LoopNest l(std::vector<Tensor*>({B, C}), {A, B, C});
- StmtPtr a_loop = l.getAllLoopNestsWritingToBuf(A->buf())[0][1];
- LoopNest::cacheAccesses(A->buf(), "A_local", a_loop);
+ LoopNest l({B, C}, {A, B, C});
+ StmtPtr a_loop = l.getAllLoopNestsWritingToBuf(A.buf())[0][1];
+ LoopNest::cacheAccesses(A.buf(), "A_local", a_loop);
l.prepareForCodegen();
StmtPtr result = IRSimplifier::simplify(l.root_stmt());
auto outer_for2 = For::make(x, 0, 10, inner_for2);
BlockPtr body = Block::make({outer_for1, outer_for2});
- Tensor* A = new Tensor(a_buf.node(), body);
+ Tensor A = Tensor(a_buf.node(), body);
LoopNest l({A});
l.prepareForCodegen();
KernelScope kernel_scope;
const int N = 10;
Placeholder x_buf("a", kFloat, {1, N, 1});
- Tensor* y = Compute(
+ Tensor y = Compute(
"f",
{{1, "m"}, {N, "n"}, {1, "o"}},
[&](const ExprHandle& m, const ExprHandle& n, const ExprHandle& o) {
return x_buf.load(m, n, o);
});
- Tensor* z = Compute(
+ Tensor z = Compute(
"f",
{{1, "m"}, {N, "n"}, {1, "o"}},
[&](const ExprHandle& m, const ExprHandle& n, const ExprHandle& o) {
- return y->load(m, n, o);
+ return y.load(m, n, o);
});
- LoopNest l(std::vector<Tensor*>({z}), {y, z});
+ LoopNest l({z}, {y, z});
l.simplify();
- ASSERT_TRUE(l.computeInline(y->buf()));
+ ASSERT_TRUE(l.computeInline(y.buf()));
}
TEST(LoopNest, CompoundTensorUsed) {
auto outer_for2 = For::make(x, 0, 10, inner_for2);
BlockPtr body = Block::make({outer_for1, outer_for2});
- Tensor* A = new Tensor(a_buf.node(), body);
- Tensor* B = Compute(
+ Tensor A = Tensor(a_buf.node(), body);
+ Tensor B = Compute(
"B", {{10, "i"}, {3, "j"}}, [&](const VarHandle& i, const VarHandle& j) {
- return A->load(i, j + 1) + A->load(i, j + 2);
+ return A.load(i, j + 1) + A.load(i, j + 2);
});
- LoopNest l(std::vector<Tensor*>({B}), {A, B});
- ASSERT_FALSE(l.computeInline(A->buf()));
+ LoopNest l({B}, {A, B});
+ ASSERT_FALSE(l.computeInline(A.buf()));
l.prepareForCodegen();
std::vector<int> a_data(50, 0);
ASSERT_EQ(hash_before, hash_after);
}
-static std::pair<std::unique_ptr<Placeholder>, Tensor*> colReduce(
- int M,
- int N) {
+static std::pair<std::unique_ptr<Placeholder>, Tensor> colReduce(int M, int N) {
auto a =
std::make_unique<Placeholder>("a", kFloat, std::vector<ExprHandle>{M, N});
- Tensor* t = Reduce(
+ Tensor t = Reduce(
"b",
{{N, "n"}},
Sum(),
return {std::move(a), t};
}
-static StmtPtr splitTailReorder(Tensor* b) {
+static StmtPtr splitTailReorder(Tensor b) {
constexpr int kVectorWidth = 8;
LoopNest nest({b});
- auto loops = nest.getAllLoopNestsWritingToBuf(b->buf())[0];
+ auto loops = nest.getAllLoopNestsWritingToBuf(b.buf())[0];
nest.splitWithTail(loops[0], kVectorWidth);
// Now the loopnests will look like:
//
// Write #2: "b[n_outer * 8 + n_inner] = ReduceOp(...)"
// Loopnest #2: {n_outer, n_inner, m};
// We will have to reorder n_inner and m.
- auto loopnests = nest.getAllLoopNestsWritingToBuf(b->buf());
+ auto loopnests = nest.getAllLoopNestsWritingToBuf(b.buf());
LoopNest::reorderAxis(loopnests[1][1], loopnests[1][2]);
nest.prepareForCodegen();
return nest.root_stmt();
}
-static StmtPtr splitMaskReorder(Tensor* b) {
+static StmtPtr splitMaskReorder(Tensor b) {
constexpr int kVectorWidth = 8;
LoopNest nest({b});
- auto loops = nest.getAllLoopNestsWritingToBuf(b->buf())[1];
+ auto loops = nest.getAllLoopNestsWritingToBuf(b.buf())[1];
nest.splitWithMask(loops[0], kVectorWidth);
- loops = nest.getAllLoopNestsWritingToBuf(b->buf())[1];
+ loops = nest.getAllLoopNestsWritingToBuf(b.buf())[1];
LoopNest::reorderAxis(loops[1], loops[2]);
nest.prepareForCodegen();
return nest.root_stmt();
}
-static void checkColReduce(StmtPtr s, Placeholder& p, Tensor* t) {
+static void checkColReduce(StmtPtr s, Placeholder& p, Tensor t) {
int M = immediateAs<int>(p.dim(0));
int N = immediateAs<int>(p.dim(1));
PaddedBuffer<float> a(M, N);
KernelScope kernel_scope;
constexpr int N = 8;
Placeholder a("a", kFloat, {N});
- Tensor* b = Compute(
+ Tensor b = Compute(
"b", {{N, "n"}}, [&](const VarHandle& n) { return a.load(n) + 1.0f; });
- Tensor* c = Compute(
- "c", {{N, "n"}}, [&](const VarHandle& n) { return b->load(n) + 2.0f; });
- LoopNest nest(std::vector<Tensor*>({c}), {b, c});
- auto loops = nest.getAllLoopNestsWritingToBuf(b->buf())[0];
+ Tensor c = Compute(
+ "c", {{N, "n"}}, [&](const VarHandle& n) { return b.load(n) + 2.0f; });
+ LoopNest nest({c}, {b, c});
+ auto loops = nest.getAllLoopNestsWritingToBuf(b.buf())[0];
ASSERT_TRUE(LoopNest::vectorize(loops[0]));
- loops = nest.getAllLoopNestsWritingToBuf(c->buf())[0];
+ loops = nest.getAllLoopNestsWritingToBuf(c.buf())[0];
ASSERT_TRUE(LoopNest::vectorize(loops[0]));
nest.prepareForCodegen();
// NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores)
constexpr int64_t N = 12;
Placeholder a("a", kLong, {N});
- Tensor* b = Compute("b", {{N, "n"}}, [&](const VarHandle& n) {
+ Tensor b = Compute("b", {{N, "n"}}, [&](const VarHandle& n) {
return a.load(n) + LongImm::make(1l);
});
LoopNest nest({b});
// Can determine if 2 loops created by Compute are dependent.
Placeholder a_buf("a", kFloat, {4, 5});
Placeholder b_buf("b", kFloat, {5, 6});
- Tensor* c = Compute(
+ Tensor c = Compute(
"broadcast_add",
{{4, "m"}, {5, "n"}, {6, "k"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
return a_buf.load(m, n) + b_buf.load(n, k);
});
- Tensor* d = Compute(
+ Tensor d = Compute(
"d",
{{4, "m"}, {5, "n"}, {6, "k"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
- return c->load(m, n, k) + 1;
+ return c.load(m, n, k) + 1;
});
- LoopNest l(std::vector<Tensor*>({d}), {c, d});
+ LoopNest l({d}, {c, d});
- MemDependencyChecker analyzer({a_buf.data(), b_buf.data()}, {d->buf()});
+ MemDependencyChecker analyzer({a_buf.data(), b_buf.data()}, {d.buf()});
l.root_stmt()->accept(&analyzer);
// Sanity test: Output depends on input.
- ASSERT_TRUE(analyzer.dependsIndirectly(d->buf(), a_buf.data()));
- ASSERT_TRUE(analyzer.dependsIndirectly(d->buf(), b_buf.data()));
+ ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), a_buf.data()));
+ ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), b_buf.data()));
// Second loop depends on first loop.
auto c_loop = l.getLoopStmtsFor(c)[0];
Placeholder a_buf("a", kFloat, {4, 5});
Placeholder b_buf("b", kFloat, {5, 6});
- Tensor* c = Compute(
+ Tensor c = Compute(
"broadcast_add",
{{4, "m"}, {5, "n"}, {6, "k"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
return a_buf.load(m, n) + b_buf.load(n, k);
});
- Tensor* d = Compute(
+ Tensor d = Compute(
"d",
{{4, "m"}, {5, "n"}, {6, "k"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
- return c->load(m, n, k) + 1;
+ return c.load(m, n, k) + 1;
});
- LoopNest l(std::vector<Tensor*>({d}), {c, d});
- l.computeInline(c->buf());
+ LoopNest l({d}, {c, d});
+ l.computeInline(c.buf());
- MemDependencyChecker analyzer({a_buf.data(), b_buf.data()}, {d->buf()});
+ MemDependencyChecker analyzer({a_buf.data(), b_buf.data()}, {d.buf()});
l.root_stmt()->accept(&analyzer);
// Sanity test: Output depends on input.
- ASSERT_TRUE(analyzer.dependsIndirectly(d->buf(), a_buf.data()));
- ASSERT_TRUE(analyzer.dependsIndirectly(d->buf(), b_buf.data()));
+ ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), a_buf.data()));
+ ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), b_buf.data()));
// broadcast_add tensor should not appear in trace at all.
for (auto& wi : analyzer.getHistory()) {
- ASSERT_NE(wi->var(), c->buf()->base_handle());
+ ASSERT_NE(wi->var(), c.buf()->base_handle());
}
}
Placeholder a_buf("a", kFloat, {4, 5});
Placeholder b_buf("b", kFloat, {5, 6});
- Tensor* c = Compute(
+ Tensor c = Compute(
"broadcast_add",
{{4, "m"}, {5, "n"}, {6, "k"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
LoopNest l({c});
- MemDependencyChecker analyzer_before(
- {a_buf.data(), b_buf.data()}, {c->buf()});
+ MemDependencyChecker analyzer_before({a_buf.data(), b_buf.data()}, {c.buf()});
l.root_stmt()->accept(&analyzer_before);
l.splitWithTail(l.getLoopStmtsFor(c)[0], 2);
- MemDependencyChecker analyzer_after({a_buf.data(), b_buf.data()}, {c->buf()});
+ MemDependencyChecker analyzer_after({a_buf.data(), b_buf.data()}, {c.buf()});
StmtPtr stmt = IRSimplifier::simplify(l.root_stmt());
stmt->accept(&analyzer_after);
Placeholder a_buf("a", kFloat, {4, 5});
Placeholder b_buf("b", kFloat, {5, 6});
- Tensor* c = Compute(
+ Tensor c = Compute(
"broadcast_add",
{{4, "m"}, {5, "n"}, {6, "k"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
LoopNest l({c});
- MemDependencyChecker analyzer_before(
- {a_buf.data(), b_buf.data()}, {c->buf()});
+ MemDependencyChecker analyzer_before({a_buf.data(), b_buf.data()}, {c.buf()});
l.root_stmt()->accept(&analyzer_before);
auto loops = l.getLoopStmtsFor(c);
l.reorderAxis(loops[0], loops[1]);
- MemDependencyChecker analyzer_after({a_buf.data(), b_buf.data()}, {c->buf()});
+ MemDependencyChecker analyzer_after({a_buf.data(), b_buf.data()}, {c.buf()});
StmtPtr stmt = IRSimplifier::simplify(l.root_stmt());
stmt->accept(&analyzer_after);
Placeholder a(BufHandle("a", {2, 3, 6}, kFloat));
Placeholder b(BufHandle("b", {2, 3, 6}, kFloat));
- Tensor* c = Compute(
+ Tensor c = Compute(
"scale",
{{2, "l2"}, {3, "n1"}, {6, "m1"}},
[&](const VarHandle& l, const VarHandle& n, const VarHandle& m) {
return b.load(l, n, m) * a.load(l, n, m);
});
- Tensor* d = Reduce("sum", {{2, "l1"}}, Sum(), c, {{3, "n1"}, {6, "m1"}});
- LoopNest l(std::vector<Tensor*>({d}), {c, d});
+ Tensor d = Reduce("sum", {{2, "l1"}}, Sum(), c, {{3, "n1"}, {6, "m1"}});
+ LoopNest l({d}, {c, d});
- MemDependencyChecker analyzer({a.data(), b.data()}, {d->buf()});
+ MemDependencyChecker analyzer({a.data(), b.data()}, {d.buf()});
l.root_stmt()->accept(&analyzer);
// Sanity test: Output depends on input.
- ASSERT_TRUE(analyzer.dependsIndirectly(d->buf(), a.data()));
- ASSERT_TRUE(analyzer.dependsIndirectly(d->buf(), b.data()));
+ ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), a.data()));
+ ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), b.data()));
// Second loop depends on first loop.
auto c_loop = l.getLoopStmtsFor(c)[0];
Placeholder AP(BufHandle("A", {M, K}, kFloat));
Placeholder BP(BufHandle("B", {K, N}, kFloat));
- Tensor* CT = Reduce(
+ Tensor CT = Reduce(
"gemm",
{{M, "M"}, {N, "N"}},
Sum(),
}
{
auto const& loops = loop.getLoopStmtsFor(CT);
- loop.cacheAccesses(CT->buf(), "C_regs", loops[2]);
+ loop.cacheAccesses(CT.buf(), "C_regs", loops[2]);
}
MemDependencyChecker analyzer_unlowered(
stmt->accept(&analyzer_unlowered);
// Outputs depend on inputs.
- ASSERT_TRUE(analyzer_unlowered.dependsIndirectly(CT->buf(), AP.data()));
- ASSERT_TRUE(analyzer_unlowered.dependsIndirectly(CT->buf(), BP.data()));
+ ASSERT_TRUE(analyzer_unlowered.dependsIndirectly(CT.buf(), AP.data()));
+ ASSERT_TRUE(analyzer_unlowered.dependsIndirectly(CT.buf(), BP.data()));
// The last write to gemm should cover the total bound of the output.
std::shared_ptr<AccessInfo> outputAccess =
- analyzer_unlowered.output(CT->buf());
+ analyzer_unlowered.output(CT.buf());
// A single dependency.
ASSERT_EQ(outputAccess->dependencies().size(), 1);
using namespace torch::jit::tensorexpr;
-using Tensors = std::vector<Tensor*>;
+using Tensors = std::vector<Tensor>;
using Args = std::vector<CodeGen::BufferArg>;
std::unique_ptr<SimpleIREvaluator> compile(
const Args& inputs,
constexpr int N = 16;
Placeholder a("a", kFloat, {M, N});
- Tensor* b = computeSum({a.handle(), dims, false}, c10::kFloat);
+ Tensor b = computeSum({a.handle(), dims, false}, c10::kFloat);
auto cg = compile({a}, {b});
auto at = at::arange(M * N, at::kFloat).view({M, N});
std::vector<float> out(M, -1.f);
- Tensor* c = Reduce("sum", {{M, "m"}}, Sum(), b, {});
+ Tensor c = Reduce("sum", {{M, "m"}}, Sum(), b, {});
LoopNest loop({c});
loop.prepareForCodegen();
StmtPtr s = loop.root_stmt();
std::vector<float> out(1, -1.f);
- Tensor* c = Reduce("sum", {}, Sum(), b, {});
+ Tensor c = Reduce("sum", {}, Sum(), b, {});
LoopNest loop({c});
loop.prepareForCodegen();
StmtPtr s = loop.root_stmt();
std::vector<float> out(1, -1.f);
- Tensor* c = Reduce("sum", {}, Sum(), b, {{10, "m"}});
+ Tensor c = Reduce("sum", {}, Sum(), b, {{10, "m"}});
LoopNest loop({c});
loop.prepareForCodegen();
StmtPtr s = loop.root_stmt();
std::vector<float> out(M, -1.f);
- Tensor* c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}});
+ Tensor c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}});
LoopNest loop({c});
loop.prepareForCodegen();
StmtPtr s = loop.root_stmt();
Placeholder b(BufHandle("b", {2, 3, m}, kFloat));
- Tensor* c = Reduce("sum", {{2, "l"}, {3, "n"}}, Sum(), b, {{m, "m"}});
+ Tensor c = Reduce("sum", {{2, "l"}, {3, "n"}}, Sum(), b, {{m, "m"}});
LoopNest loop({c});
loop.prepareForCodegen();
StmtPtr s = loop.root_stmt();
ASSERT_EQ(cData[i], expected);
}
- Tensor* d = Reduce("sum2", {{2, "l"}}, Sum(), b, {{3, "n"}, {m, "m"}});
+ Tensor d = Reduce("sum2", {{2, "l"}}, Sum(), b, {{3, "n"}, {m, "m"}});
LoopNest loop2({d});
loop2.prepareForCodegen();
StmtPtr s2 = loop2.root_stmt();
}
// This is the same as just reducing the original result across that axis.
- Placeholder c_buf(BufHandle(c->buf()));
- Tensor* e = Reduce("sum3", {{2, "l"}}, Sum(), c_buf, {{3, "m"}});
+ Placeholder c_buf(BufHandle(c.buf()));
+ Tensor e = Reduce("sum3", {{2, "l"}}, Sum(), c_buf, {{3, "m"}});
LoopNest loop3({e});
loop3.prepareForCodegen();
StmtPtr s3 = loop3.root_stmt();
std::vector<float> in(InputSize, 1.f);
std::vector<float> out(OutputSize, -1.f);
- Tensor* c = Reduce(
+ Tensor c = Reduce(
"sum",
{{2, "a"}, {3, "b"}, {2, "c"}, {3, "d"}, {2, "e"}},
Sum(),
Reducer product(
ExprHandle(1.f), [](ExprHandle a, ExprHandle b) { return a * b; });
- Tensor* c = Reduce("product", {{M, "m"}}, product, b, {{N, "n"}});
+ Tensor c = Reduce("product", {{M, "m"}}, product, b, {{N, "n"}});
LoopNest loop({c});
loop.prepareForCodegen();
StmtPtr s = loop.root_stmt();
in[j] = j;
}
- Tensor* dm1 = Reduce("max", {}, Maximum(kFloat), in_, {{10, "m"}});
+ Tensor dm1 = Reduce("max", {}, Maximum(kFloat), in_, {{10, "m"}});
LoopNest loop({dm1});
loop.prepareForCodegen();
Placeholder in2_(BufHandle("b", {2, 5}, kFloat));
std::vector<float> out2(2, -1.f);
- Tensor* m2d = Reduce("max", {{2, "n"}}, Maximum(kFloat), in2_, {{5, "m"}});
+ Tensor m2d = Reduce("max", {{2, "n"}}, Maximum(kFloat), in2_, {{5, "m"}});
LoopNest loop2({m2d});
loop2.prepareForCodegen();
in[j] = 10 + j;
}
- Tensor* min = Reduce(
+ Tensor min = Reduce(
"min",
{},
Minimum(ExprHandle(minInit)),
return CompareSelect::make(a, 1, 1, b, kEQ);
});
- Tensor* any = Reduce(
+ Tensor any = Reduce(
"anyEqual",
{{4, "i"}},
anyEqSV,
return CompareSelect::make(a, 0, 0, b, kEQ);
});
- Tensor* allGreaterThan = Reduce(
+ Tensor allGreaterThan = Reduce(
"allGreaterThan",
{{4, "i"}},
allGTSV,
}
}
- Tensor* mm = Reduce(
+ Tensor mm = Reduce(
"mm",
{{3, "m"}, {3, "n"}},
Sum(),
std::vector<float> in_rf_(10, -2.f);
std::vector<float> out(1, -1.f);
- Tensor* l1 = Reduce("l1", {{10, "i"}}, Sum(), in, {{10, "j"}});
- Placeholder in_rf(BufHandle(l1->buf()));
+ Tensor l1 = Reduce("l1", {{10, "i"}}, Sum(), in, {{10, "j"}});
+ Placeholder in_rf(BufHandle(l1.buf()));
- Tensor* l2 = Reduce("l2", {}, Sum(), in_rf, {{10, "i"}});
+ Tensor l2 = Reduce("l2", {}, Sum(), in_rf, {{10, "i"}});
LoopNest loop({l1, l2});
loop.prepareForCodegen();
Placeholder a(BufHandle("a", {2, 3}, kFloat));
Placeholder b(BufHandle("b", {2, 3, m}, kFloat));
- Tensor* c = Reduce("sum", {{2, "l1"}, {3, "n1"}}, Sum(), b, {{m, "m1"}});
- Tensor* d = Compute(
+ Tensor c = Reduce("sum", {{2, "l1"}, {3, "n1"}}, Sum(), b, {{m, "m1"}});
+ Tensor d = Compute(
"scale",
{{2, "l2"}, {3, "n1"}},
[&](const VarHandle& l, const VarHandle& n) {
- return c->load(l, n) * a.load(l, n);
+ return c.load(l, n) * a.load(l, n);
});
- LoopNest loop(std::vector<Tensor*>({d}), {c, d});
+ LoopNest loop({d}, {c, d});
loop.prepareForCodegen();
StmtPtr s = loop.root_stmt();
s = IRSimplifier::simplify(s);
Placeholder a(BufHandle("a", {2, 3, m}, kFloat));
Placeholder b(BufHandle("b", {2, 3, m}, kFloat));
- Tensor* c = Compute(
+ Tensor c = Compute(
"scale",
{{2, "l2"}, {3, "n1"}, {m, "m1"}},
[&](const VarHandle& l, const VarHandle& n, const VarHandle& m) {
return b.load(l, n, m) * a.load(l, n, m);
});
- Tensor* d = Reduce("sum", {{2, "l1"}}, Sum(), c, {{3, "n1"}, {m, "m1"}});
- LoopNest loop(std::vector<Tensor*>({d}), {c, d});
+ Tensor d = Reduce("sum", {{2, "l1"}}, Sum(), c, {{3, "n1"}, {m, "m1"}});
+ LoopNest loop({d}, {c, d});
loop.prepareForCodegen();
StmtPtr s = loop.root_stmt();
s = IRSimplifier::simplify(s);
}
std::vector<float> out(16, -1.f);
- Tensor* tensor = Reduce("sum", {{16, "m"}}, Sum(), in, {{8, "n"}});
+ Tensor tensor = Reduce("sum", {{16, "m"}}, Sum(), in, {{8, "n"}});
LoopNest l({tensor});
std::vector<ForPtr> loops = l.getLoopStmtsFor(tensor);
LoopNest::splitWithTail(loops[1], 2);
}
}
std::vector<float> out(16, -1.f);
- Tensor* tensor = Reduce("sum", {{16, "m"}}, Sum(), in, {{8, "n"}});
+ Tensor tensor = Reduce("sum", {{16, "m"}}, Sum(), in, {{8, "n"}});
LoopNest l({tensor});
std::vector<ForPtr> loops = l.getLoopStmtsFor(tensor);
LoopNest::splitWithTail(loops[0], 2);
Placeholder in(BufHandle("in", {1, 12, 6}, kFloat));
std::vector<float> in_(12 * 6, 1.f);
- Tensor* tensor_ = Reduce("sum", {{1, "k"}, {12, "n"}}, Sum(), in, {{6, "m"}});
+ Tensor tensor_ = Reduce("sum", {{1, "k"}, {12, "n"}}, Sum(), in, {{6, "m"}});
LoopNest l_({tensor_});
l_.prepareForCodegen();
StmtPtr s_ = Stmt::clone(l_.root_stmt());
s_ = IRSimplifier::simplify(s_);
- Tensor* tensor = Reduce("sum", {{1, "k"}, {12, "n"}}, Sum(), in, {{6, "m"}});
+ Tensor tensor = Reduce("sum", {{1, "k"}, {12, "n"}}, Sum(), in, {{6, "m"}});
LoopNest l({tensor});
auto loops = l.getLoopStmtsFor(tensor);
std::vector<float> out(1, -1.f);
- Tensor* c = Reduce("sum", {}, Sum(), b, {{m, "m"}, {n, "n"}});
+ Tensor c = Reduce("sum", {}, Sum(), b, {{m, "m"}, {n, "n"}});
LoopNest loop({c});
std::vector<ForPtr> loops = loop.getLoopStmtsFor(c);
- auto c_body = loop.getAllWritesToBuf(c->buf())[1];
+ auto c_body = loop.getAllWritesToBuf(c.buf())[1];
ASSERT_TRUE(loop.rfactor(c_body, loops.at(0)));
auto rc = NodeFinder<ReduceOp>::find(loop.root_stmt());
ASSERT_EQ(rc.size(), 2);
std::vector<float> out(1, -1.f);
- Tensor* c = Reduce("sum", {}, Sum(), b, {{m, "m"}, {n, "n"}, {k, "k"}});
+ Tensor c = Reduce("sum", {}, Sum(), b, {{m, "m"}, {n, "n"}, {k, "k"}});
LoopNest loop({c});
std::vector<ForPtr> loops = loop.getLoopStmtsFor(c);
- auto c_body = loop.getAllWritesToBuf(c->buf())[1];
+ auto c_body = loop.getAllWritesToBuf(c.buf())[1];
ASSERT_FALSE(loop.rfactor(c_body, loops.at(2)));
auto rc = NodeFinder<ReduceOp>::find(loop.root_stmt());
ASSERT_EQ(rc.size(), 1);
std::vector<float> out(1, -1.f);
- Tensor* c = Reduce("sum", {}, Sum(), b, {{m, "m"}, {n, "n"}, {k, "k"}});
+ Tensor c = Reduce("sum", {}, Sum(), b, {{m, "m"}, {n, "n"}, {k, "k"}});
LoopNest loop({c});
std::vector<ForPtr> loops = loop.getLoopStmtsFor(c);
- auto c_body = loop.getAllWritesToBuf(c->buf())[1];
+ auto c_body = loop.getAllWritesToBuf(c.buf())[1];
ASSERT_TRUE(loop.rfactor(c_body, loops.at(0)));
auto rc = NodeFinder<ReduceOp>::find(loop.root_stmt());
ASSERT_EQ(rc.size(), 2);
std::vector<float> out(1, -1.f);
std::vector<float> ref(1, -1.f);
- Tensor* c = Reduce(
+ Tensor c = Reduce(
"sum",
{},
Sum(),
IRSimplifier::simplify(refloop.root_stmt()), {in_, c});
ref_cg.call({in, ref});
- BufPtr tmp_buf = c->buf();
+ BufPtr tmp_buf = c.buf();
for (int idx = 0; idx < rfac_number; idx++) {
auto reduce = loop.getAllWritesToBuf(tmp_buf)[1];
for (int i = 0; i < 3; ++i) {
std::vector<float> out(M, -1.f);
- Tensor* c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}});
+ Tensor c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}});
LoopNest loop({c});
std::vector<ForPtr> loops = loop.getLoopStmtsFor(c);
LoopNest::splitWithTail(loops[i], 8);
for (int i = 0; i < 3; ++i) {
std::vector<float> out(M, -1.f);
- Tensor* c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}});
+ Tensor c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}});
LoopNest loop({c});
std::vector<ForPtr> loops = loop.getLoopStmtsFor(c);
LoopNest::splitWithTail(loops[i], 5);
for (int i = 0; i < 3; ++i) {
std::vector<float> out(M, -1.f);
- Tensor* c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}});
+ Tensor c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}});
LoopNest loop({c});
std::vector<ForPtr> loops = loop.getLoopStmtsFor(c);
LoopNest::splitWithTail(loops[i], 16);
for (int i = 0; i < 3; ++i) {
std::vector<float> out(M, -1.f);
- Tensor* c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}});
+ Tensor c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}});
LoopNest loop({c});
std::vector<ForPtr> loops = loop.getLoopStmtsFor(c);
LoopNest::splitWithMask(loops[i], 8);
for (int i = 0; i < 3; ++i) {
std::vector<float> out(M, -1.f);
- Tensor* c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}});
+ Tensor c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}});
LoopNest loop({c});
std::vector<ForPtr> loops = loop.getLoopStmtsFor(c);
LoopNest::splitWithMask(loops[i], 5);
for (int i = 0; i < 3; ++i) {
std::vector<float> out(M, -1.f);
- Tensor* c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}});
+ Tensor c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}});
LoopNest loop({c});
std::vector<ForPtr> loops = loop.getLoopStmtsFor(c);
LoopNest::splitWithMask(loops[i], 16);
std::vector<float> out(M, -1.f);
- Tensor* c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}});
+ Tensor c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}});
LoopNest loop({c});
std::vector<ForPtr> loops = loop.getLoopStmtsFor(c);
LoopNest::splitWithTail(loops[2], SPLIT_FACTOR);
- auto c_body = loop.getAllWritesToBuf(c->buf())[2];
- auto all_loops = loop.getAllLoopNestsWritingToBuf(c->buf());
+ auto c_body = loop.getAllWritesToBuf(c.buf())[2];
+ auto all_loops = loop.getAllLoopNestsWritingToBuf(c.buf());
ASSERT_TRUE(all_loops.size() == 3 && all_loops.at(2).size() == 3);
LoopNest::reorderAxis(all_loops[2][1], all_loops[2][2]);
- all_loops = loop.getAllLoopNestsWritingToBuf(c->buf());
+ all_loops = loop.getAllLoopNestsWritingToBuf(c.buf());
ASSERT_TRUE(all_loops.size() == 3 && all_loops.at(2).size() == 3);
ASSERT_TRUE(loop.rfactor(c_body, all_loops[2][1]));
loop.prepareForCodegen();
std::vector<float> out(1, -1.f);
- Tensor* c = Reduce("sum", {}, Sum(), b, {{N, "n"}, {K, "k"}});
+ Tensor c = Reduce("sum", {}, Sum(), b, {{N, "n"}, {K, "k"}});
LoopNest loop({c});
std::vector<ForPtr> loops = loop.getLoopStmtsFor(c);
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
LoopNest::splitWithTail(loops[1], SPLIT_FACTOR, &i, &t);
LoopNest::reorderAxis(loops[0], i);
- auto all_loops = loop.getAllLoopNestsWritingToBuf(c->buf());
+ auto all_loops = loop.getAllLoopNestsWritingToBuf(c.buf());
ASSERT_TRUE(all_loops.size() == 3 && all_loops.at(1).size() == 3);
- auto c_body = loop.getAllWritesToBuf(c->buf())[1];
+ auto c_body = loop.getAllWritesToBuf(c.buf())[1];
ASSERT_TRUE(loop.rfactor(c_body, all_loops[1][0]));
LoopNest::reorderAxis(all_loops[1][0], all_loops[1][2]);
Placeholder a_buf("a", kFloat, {M});
Placeholder b_buf("b", kFloat, {M, N, K});
- Tensor* x = Reduce("x", {{M, "m1"}}, Sum(), b_buf, {{N, "n1"}, {K, "k1"}});
- Tensor* y = Compute("y", {{M, "m2"}}, [&](const VarHandle& m) {
- return a_buf.load(m) + x->load(m);
+ Tensor x = Reduce("x", {{M, "m1"}}, Sum(), b_buf, {{N, "n1"}, {K, "k1"}});
+ Tensor y = Compute("y", {{M, "m2"}}, [&](const VarHandle& m) {
+ return a_buf.load(m) + x.load(m);
});
PaddedBuffer<float> a_v(M);
}
}
- LoopNest l1(std::vector<Tensor*>({y}), {x, y});
+ LoopNest l1({y}, {x, y});
// Cannot inline a reduction computation
- ASSERT_FALSE(l1.computeInline(x->buf()));
+ ASSERT_FALSE(l1.computeInline(x.buf()));
}
TEST(Reductions, ReduceInlineConsumer) {
Placeholder a_buf("a", kFloat, {M, N, K});
Placeholder b_buf("b", kFloat, {M, N, K});
- Tensor* x = Compute(
+ Tensor x = Compute(
"x",
{{M, "m1"}, {N, "n1"}, {K, "k1"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
return a_buf.load(m, n, k) + b_buf.load(m, n, k);
});
- Tensor* y = Reduce("y", {{M, "m2"}}, Sum(), x, {{N, "n2"}, {K, "k2"}});
+ Tensor y = Reduce("y", {{M, "m2"}}, Sum(), x, {{N, "n2"}, {K, "k2"}});
PaddedBuffer<float> a_v(M, N, K);
PaddedBuffer<float> b_v(M, N, K);
}
}
- LoopNest l1(std::vector<Tensor*>({y}), {x, y});
+ LoopNest l1({y}, {x, y});
LoopNest l2(l1);
- l2.computeInline(x->buf());
+ l2.computeInline(x.buf());
l1.prepareForCodegen();
l2.prepareForCodegen();
Placeholder a_buf("a", kFloat, {M, N, K});
Placeholder b_buf("b", kFloat, {M, N, K});
- Tensor* x = Compute(
+ Tensor x = Compute(
"x",
{{M, "m1"}, {N, "n1"}, {K, "k1"}},
[&](const VarHandle& m, const VarHandle& n, const VarHandle& k) {
Reducer minimum(ExprHandle(0.f), [&](ExprHandle a, ExprHandle b) {
return Add::make(ExprHandle(1.f), Min::make(a, b, false));
});
- Tensor* y = Reduce("y", {{M, "m2"}}, minimum, x, {{N, "n2"}, {K, "k2"}});
+ Tensor y = Reduce("y", {{M, "m2"}}, minimum, x, {{N, "n2"}, {K, "k2"}});
PaddedBuffer<float> a_v(M, N, K);
PaddedBuffer<float> b_v(M, N, K);
}
}
- LoopNest l1(std::vector<Tensor*>({y}), {x, y});
+ LoopNest l1({y}, {x, y});
LoopNest l2(l1);
- l2.computeInline(x->buf());
+ l2.computeInline(x.buf());
l1.prepareForCodegen();
l2.prepareForCodegen();
Placeholder a(BufHandle("a", {L, N, M}, kFloat));
Placeholder b(BufHandle("b", {L, N, M}, kFloat));
- Tensor* c = Compute(
+ Tensor c = Compute(
"scale",
{{L, "l2"}, {N, "n1"}, {M, "m1"}},
[&](const VarHandle& l, const VarHandle& n, const VarHandle& m) {
return b.load(l, n, m) * a.load(l, n, m);
});
- Tensor* d = Reduce("sum", {{L, "l1"}}, Sum(), c, {{N, "n1"}, {M, "m1"}});
+ Tensor d = Reduce("sum", {{L, "l1"}}, Sum(), c, {{N, "n1"}, {M, "m1"}});
- Tensor* e = Compute("scale", {{L, "l"}}, [&](const VarHandle& l) {
- return b.load(0, 0, l) * d->load(l);
+ Tensor e = Compute("scale", {{L, "l"}}, [&](const VarHandle& l) {
+ return b.load(0, 0, l) * d.load(l);
});
- LoopNest l(std::vector<Tensor*>({e}), {c, d, e});
+ LoopNest l({e}, {c, d, e});
LoopNest l_before(l);
l_before.prepareForCodegen();
SimpleIREvaluator cg_before(l_before.root_stmt(), {a, b, e});
StmtPtr d_loop = l.getLoopStmtsFor(d)[0];
- l.cacheAccesses(d->buf(), "d_local", d_loop);
+ l.cacheAccesses(d.buf(), "d_local", d_loop);
l.prepareForCodegen();
StmtPtr result = IRSimplifier::simplify(l.root_stmt());
Placeholder a(BufHandle("a", {L, N, M}, kFloat));
Placeholder b(BufHandle("b", {L, N, M}, kFloat));
- Tensor* c = Compute(
+ Tensor c = Compute(
"scale",
{{L, "l2"}, {N, "n1"}, {M, "m1"}},
[&](const VarHandle& l, const VarHandle& n, const VarHandle& m) {
return b.load(l, n, m) * a.load(l, n, m);
});
- Tensor* d = Reduce("sum", {{L, "l1"}}, Sum(), c, {{N, "n1"}, {M, "m1"}});
+ Tensor d = Reduce("sum", {{L, "l1"}}, Sum(), c, {{N, "n1"}, {M, "m1"}});
- Tensor* e = Compute("scale", {{L, "l"}}, [&](const VarHandle& l) {
- return b.load(0, 0, l) * d->load(l);
+ Tensor e = Compute("scale", {{L, "l"}}, [&](const VarHandle& l) {
+ return b.load(0, 0, l) * d.load(l);
});
- LoopNest l(std::vector<Tensor*>({e}), {c, d, e});
+ LoopNest l({e}, {c, d, e});
LoopNest l_before(l);
l_before.prepareForCodegen();
SimpleIREvaluator cg_before(l_before.root_stmt(), {a, b, e});
StmtPtr d_loop = l.getLoopStmtsFor(d)[1];
- l.cacheAccesses(d->buf(), "d_local", d_loop);
+ l.cacheAccesses(d.buf(), "d_local", d_loop);
l.prepareForCodegen();
StmtPtr result = IRSimplifier::simplify(l.root_stmt());
Placeholder a(BufHandle("a", {L, N, M}, kFloat));
Placeholder b(BufHandle("b", {L, N, M}, kFloat));
- Tensor* c = Compute(
+ Tensor c = Compute(
"scale",
{{L, "l2"}, {N, "n1"}, {M, "m1"}},
[&](const VarHandle& l, const VarHandle& n, const VarHandle& m) {
return b.load(l, n, m) * a.load(l, n, m);
});
- Tensor* d = Reduce("sum", {{L, "l1"}}, Sum(), c, {{N, "n1"}, {M, "m1"}});
+ Tensor d = Reduce("sum", {{L, "l1"}}, Sum(), c, {{N, "n1"}, {M, "m1"}});
- Tensor* e = Compute("scale", {{L, "l"}}, [&](const VarHandle& l) {
- return b.load(0, 0, l) * d->load(l);
+ Tensor e = Compute("scale", {{L, "l"}}, [&](const VarHandle& l) {
+ return b.load(0, 0, l) * d.load(l);
});
- LoopNest l(std::vector<Tensor*>({e}), {c, d, e});
+ LoopNest l({e}, {c, d, e});
LoopNest l_before(l);
l_before.prepareForCodegen();
SimpleIREvaluator cg_before(l_before.root_stmt(), {a, b, e});
StmtPtr d_loop = l.getLoopStmtsFor(d)[2];
- l.cacheAccesses(d->buf(), "d_local", d_loop);
+ l.cacheAccesses(d.buf(), "d_local", d_loop);
l.prepareForCodegen();
StmtPtr result = IRSimplifier::simplify(l.root_stmt());
Placeholder a(BufHandle("a", {24, 32, 12}, kFloat));
Placeholder b(BufHandle("b", {24, 32, 12}, kFloat));
- Tensor* c = Compute(
+ Tensor c = Compute(
"scale",
{{24, "l2"}, {32, "n1"}, {12, "m1"}},
[&](const VarHandle& l, const VarHandle& n, const VarHandle& m) {
return b.load(l, n, m) * a.load(l, n, m);
});
- Tensor* d = Reduce("sum", {{24, "l1"}}, Sum(), c, {{32, "n1"}, {12, "m1"}});
+ Tensor d = Reduce("sum", {{24, "l1"}}, Sum(), c, {{32, "n1"}, {12, "m1"}});
- Tensor* e = Compute("scale", {{24, "l"}}, [&](const VarHandle& l) {
- return b.load(0, 0, l) * d->load(l);
+ Tensor e = Compute("scale", {{24, "l"}}, [&](const VarHandle& l) {
+ return b.load(0, 0, l) * d.load(l);
});
- LoopNest l(std::vector<Tensor*>({e}), {c, d, e});
+ LoopNest l({e}, {c, d, e});
StmtPtr d_loop = l.getLoopStmtsFor(d)[1];
- l.cacheAccesses(c->buf(), "scale_local", d_loop);
+ l.cacheAccesses(c.buf(), "scale_local", d_loop);
l.prepareForCodegen();
StmtPtr result = IRSimplifier::simplify(l.root_stmt());
Placeholder a(BufHandle("a", {24, 32, 12}, kFloat));
Placeholder b(BufHandle("b", {24, 32, 12}, kFloat));
- Tensor* c = Compute(
+ Tensor c = Compute(
"scale",
{{24, "l2"}, {32, "n1"}, {12, "m1"}},
[&](const VarHandle& l, const VarHandle& n, const VarHandle& m) {
return b.load(l, n, m) * a.load(l, n, m);
});
- Tensor* d = Reduce("sum", {{24, "l1"}}, Sum(), c, {{32, "n1"}, {12, "m1"}});
+ Tensor d = Reduce("sum", {{24, "l1"}}, Sum(), c, {{32, "n1"}, {12, "m1"}});
- Tensor* e = Compute("scale", {{24, "l"}}, [&](const VarHandle& l) {
- return b.load(0, 0, l) * d->load(l);
+ Tensor e = Compute("scale", {{24, "l"}}, [&](const VarHandle& l) {
+ return b.load(0, 0, l) * d.load(l);
});
- LoopNest l(std::vector<Tensor*>({e}), {c, d, e});
+ LoopNest l({e}, {c, d, e});
LoopNest::splitWithMask(l.getLoopStmtsFor(e)[0], 4);
StmtPtr e_loop = l.getLoopStmtsFor(e)[1];
- l.cacheAccesses(d->buf(), "sum_local", e_loop);
+ l.cacheAccesses(d.buf(), "sum_local", e_loop);
l.prepareForCodegen();
StmtPtr result = IRSimplifier::simplify(l.root_stmt());
Placeholder a(BufHandle("a", {24, 32, 12}, kFloat));
Placeholder b(BufHandle("b", {24, 32, 12}, kFloat));
- Tensor* c = Compute(
+ Tensor c = Compute(
"scale",
{{24, "l2"}, {32, "n1"}, {12, "m1"}},
[&](const VarHandle& l, const VarHandle& n, const VarHandle& m) {
return b.load(l, n, m) * a.load(l, n, m);
});
- Tensor* d = Reduce("sum", {{24, "l1"}}, Sum(), c, {{32, "n1"}, {12, "m1"}});
+ Tensor d = Reduce("sum", {{24, "l1"}}, Sum(), c, {{32, "n1"}, {12, "m1"}});
- Tensor* e = Compute("scale", {{24, "l"}}, [&](const VarHandle& l) {
- return b.load(0, 0, l) * d->load(l);
+ Tensor e = Compute("scale", {{24, "l"}}, [&](const VarHandle& l) {
+ return b.load(0, 0, l) * d.load(l);
});
- LoopNest l(std::vector<Tensor*>({e}), {c, d, e});
+ LoopNest l({e}, {c, d, e});
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr inner;
// Split reduction consumer.
LoopNest::splitWithMask(l.getLoopStmtsFor(e)[0], 4, &inner);
- l.cacheAccesses(d->buf(), "sum_local", inner);
+ l.cacheAccesses(d.buf(), "sum_local", inner);
l.prepareForCodegen();
StmtPtr result = IRSimplifier::simplify(l.root_stmt());
Placeholder a(BufHandle("a", {24, 32, 12}, kFloat));
Placeholder b(BufHandle("b", {24, 32, 12}, kFloat));
- Tensor* c = Compute(
+ Tensor c = Compute(
"scale",
{{24, "l2"}, {32, "n1"}, {12, "m1"}},
[&](const VarHandle& l, const VarHandle& n, const VarHandle& m) {
return b.load(l, n, m) * a.load(l, n, m);
});
- Tensor* d = Reduce("sum", {{24, "l1"}}, Sum(), c, {{32, "n1"}, {12, "m1"}});
+ Tensor d = Reduce("sum", {{24, "l1"}}, Sum(), c, {{32, "n1"}, {12, "m1"}});
- Tensor* e = Compute("scale", {{24, "l"}}, [&](const VarHandle& l) {
- return b.load(0, 0, l) * d->load(l);
+ Tensor e = Compute("scale", {{24, "l"}}, [&](const VarHandle& l) {
+ return b.load(0, 0, l) * d.load(l);
});
- LoopNest l(std::vector<Tensor*>({e}), {c, d, e});
+ LoopNest l({e}, {c, d, e});
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr inner;
// Split reduction consumer.
LoopNest::splitWithMask(l.getLoopStmtsFor(e)[0], 4, &inner);
- l.cacheAccesses(d->buf(), "sum_local", inner);
+ l.cacheAccesses(d.buf(), "sum_local", inner);
l.prepareForCodegen();
StmtPtr result = IRSimplifier::simplify(l.root_stmt());
std::vector<float> out(1, -1.f);
- Tensor* c = Reduce("sum", {}, Sum(), b, {{m, "a"}, {n, "b"}, {k, "c"}});
+ Tensor c = Reduce("sum", {}, Sum(), b, {{m, "a"}, {n, "b"}, {k, "c"}});
LoopNest loop({c});
std::vector<ForPtr> loops = loop.getLoopStmtsFor(c);
LoopNest::reorderAxis(loops.at(0), loops.at(1));
loops = loop.getLoopStmtsFor(c);
- auto c_body = loop.getAllWritesToBuf(c->buf())[1];
+ auto c_body = loop.getAllWritesToBuf(c.buf())[1];
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
BufPtr rfac_buf;
ASSERT_TRUE(loop.rfactor(c_body, loops.at(0), &rfac_buf));
std::vector<float> out(1, -1.f);
- Tensor* c = Reduce("sum", {}, Sum(), b, {{m, "a"}, {n, "b"}, {k, "c"}});
+ Tensor c = Reduce("sum", {}, Sum(), b, {{m, "a"}, {n, "b"}, {k, "c"}});
LoopNest loop({c});
std::vector<ForPtr> loops = loop.getLoopStmtsFor(c);
- auto c_body = loop.getAllWritesToBuf(c->buf())[1];
+ auto c_body = loop.getAllWritesToBuf(c.buf())[1];
LoopNest::reorderAxis(loops.at(0), loops.at(1));
loops = loop.getLoopStmtsFor(c);
Placeholder in(BufHandle("in", {8, 8}, kFloat));
- Tensor* tensor = Reduce("sum", {{8, "m"}}, Sum(), in, {{8, "n"}});
+ Tensor tensor = Reduce("sum", {{8, "m"}}, Sum(), in, {{8, "n"}});
LoopNest l_before({tensor});
LoopNest l(l_before);
l_before.prepareForCodegen();
Placeholder in(BufHandle("in", {8, 8}, kFloat));
- Tensor* tensor = Reduce("sum", {{8, "m"}}, Sum(), in, {{8, "n"}});
+ Tensor tensor = Reduce("sum", {{8, "m"}}, Sum(), in, {{8, "n"}});
LoopNest l({tensor});
ASSERT_FALSE(LoopNest::vectorize(l.getLoopStmtsFor(tensor)[1]));
Placeholder in(BufHandle("in", {8, 8}, kFloat));
- Tensor* tensor = Reduce("sum", {}, Sum(), in, {{8, "m"}, {8, "n"}});
+ Tensor tensor = Reduce("sum", {}, Sum(), in, {{8, "m"}, {8, "n"}});
LoopNest l_before({tensor});
LoopNest l(l_before);
std::vector<ForPtr> loops = l.getLoopStmtsFor(tensor);
LoopNest::reorderAxis(loops[0], loops[1]);
loops = l.getLoopStmtsFor(tensor);
- auto tensor_body = l.getAllWritesToBuf(tensor->buf())[1];
+ auto tensor_body = l.getAllWritesToBuf(tensor.buf())[1];
BufPtr rfac_buf = nullptr;
ASSERT_TRUE(LoopNest::rfactor(tensor_body, loops.at(0), &rfac_buf));
constexpr int N = 16;
Placeholder A("A", kFloat, {M, N});
Placeholder B("B", kFloat, {N});
- Tensor* C = Reduce(
+ Tensor C = Reduce(
"C",
{{N, "n"}},
Sum(),
{
Placeholder a("a", kFloat, {1, 12, 1});
VarHandle x("x", kInt);
- Tensor* b = Compute(
+ Tensor b = Compute(
// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
"x",
{{1, "i"}, {12, "m"}, {1, "n"}},
ExprPtr body = alloc<Mul>(i, j);
// Finally, we pass all these pieces together to Tensor constructor:
- Tensor* X = new Tensor(buf, args, body);
- std::cout << "Tensor computation: " << *X << std::endl;
+ Tensor X = Tensor(buf, args, body);
+ std::cout << "Tensor computation: " << X << std::endl;
// Prints:
// Tensor computation: Tensor X[64, 32]:
// for (int i = 0; i < 64; i++) {
// constructing Exprs, Tensors also have a more convenient API for
// construction. It is based on Compute API, which takes a name,
// dimensions, and a lambda specifying the computation body:
- Tensor* Z = Compute(
+ Tensor Z = Compute(
"Z",
{{64, "i"}, {32, "j"}},
[](const VarHandle& i, const VarHandle& j) { return i / j; });
- std::cout << "Tensor computation: " << *Z << std::endl;
+ std::cout << "Tensor computation: " << Z << std::endl;
// Prints:
// Tensor computation: Tensor Z[64, 32]:
// for (int i = 0; i < 64; i++) {
// Tensors might access other tensors and external placeholders in their
// expressions. It can be done like so:
Placeholder P("P", kInt, {64, 32});
- Tensor* R = Compute(
+ Tensor R = Compute(
"R",
{{64, "i"}, {32, "j"}},
[&](const VarHandle& i, const VarHandle& j) {
- return Z->load(i, j) * P.load(i, j);
+ return Z.load(i, j) * P.load(i, j);
});
- std::cout << "Tensor computation: " << *R << std::endl;
+ std::cout << "Tensor computation: " << R << std::endl;
// Prints:
// Tensor computation: Tensor R[64, 32]:
// for (int i = 0; i < 64; i++) {
// Let's create a simple tensor expression and construct a loop nest for it.
Placeholder A("A", kFloat, {64, 32});
Placeholder B("B", kFloat, {64, 32});
- Tensor* X = Compute(
+ Tensor X = Compute(
"X",
{{64, "i"}, {32, "j"}},
[&](const VarHandle& i, const VarHandle& j) {
return A.load(i, j) + B.load(i, j);
});
- Tensor* Y = Compute(
+ Tensor Y = Compute(
"Y",
{{64, "i"}, {32, "j"}},
[&](const VarHandle& i, const VarHandle& j) {
- return sigmoid(X->load(i, j));
+ return sigmoid(X.load(i, j));
});
- std::cout << "Tensor computation X: " << *X
- << "Tensor computation Y: " << *Y << std::endl;
+ std::cout << "Tensor computation X: " << X << "Tensor computation Y: " << Y
+ << std::endl;
// Prints:
// Tensor computation X: Tensor X[64, 32]:
// for (int i = 0; i < 64; i++) {
// Creating a loop nest is as quite simple, we just need to specify a list
// of all and a list of output tensors:
// NOLINTNEXTLINE(bugprone-argument-comment)
- std::vector<Tensor*> outputs = {Y};
- std::vector<Tensor*> all = {X, Y};
- LoopNest loopnest(outputs, all);
+ LoopNest loopnest(/*outputs=*/{Y}, /*all=*/{X, Y});
// An IR used in LoopNest is based on tensor statements, represented by
// `Stmt` class. Statements are used to specify the loop nest structure, and
// Let's start by constructing a simple computation for us to work with:
Placeholder A("A", kInt, {64, 32});
Placeholder B("B", kInt, {64, 32});
- Tensor* X = Compute(
+ Tensor X = Compute(
"X",
{{64, "i"}, {32, "j"}},
[&](const VarHandle& i, const VarHandle& j) {
void optimizePointwise(
tensorexpr::LoopNest* ln,
- tensorexpr::Tensor* target,
+ tensorexpr::Tensor target,
int width) {
using namespace torch::jit::tensorexpr;
std::vector<ForPtr> loops = ln->getLoopStmtsFor(target);
std::shared_ptr<TEWrapper> wrapTECompute(
std::shared_ptr<TEWrapper> wrap,
tensorexpr::Placeholder& in,
- tensorexpr::Tensor* out,
+ tensorexpr::Tensor out,
tensorexpr::VarHandle& dim,
int width = kVectorWidth) {
using namespace torch::jit::tensorexpr;
std::shared_ptr<TEWrapper> wrapTECompute(
std::shared_ptr<TEWrapper> wrap,
tensorexpr::Placeholder& in,
- tensorexpr::Tensor* out,
+ tensorexpr::Tensor out,
tensorexpr::VarHandle& dim,
int width = kVectorWidth) {
return wrap;
auto wrap = std::make_shared<TEWrapper>();
auto N = VarHandle("N", kInt);
Placeholder A("A", kFloat, {N});
- tensorexpr::Tensor* B = Compute("B", {N}, [&](const VarHandle& i) {
+ tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i) {
auto A_elem = [&]() {
if (!clamp) {
return A.load(i);
wrap = std::make_shared<TEWrapper>();
auto N = VarHandle("N", kInt);
Placeholder A("A", kFloat, {N});
- tensorexpr::Tensor* B = Compute("B", {N}, [&](const VarHandle& i) {
+ tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i) {
auto zero = FloatImm::make(0.f);
auto a = A.load(i);
return ifThenElse(a < zero, zero, a);
wrap = std::make_shared<TEWrapper>();
auto N = VarHandle("N", kInt);
Placeholder A("A", kFloat, {N});
- tensorexpr::Tensor* B = Compute("B", {N}, [&](const VarHandle& i) {
+ tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i) {
auto a = A.load(i);
return fast_tanh(a);
});
wrap = std::make_shared<TEWrapper>();
auto N = VarHandle("N", kInt);
Placeholder A("A", kFloat, {N});
- Tensor* B =
+ Tensor B =
Compute("B", {N}, [&](const VarHandle& i) { return sigmoid(A.load(i)); });
// NNC uses sleef for vectorizing sigmoid, which comes in an 8-wide flavor
// (Sleef_expf8).
class CodeGen::BufferArg {
public:
BufferArg(const Placeholder& buffer) : buf_(buffer.data()) {}
- BufferArg(Tensor* tensor) : buf_(tensor->buf()) {}
+ BufferArg(Tensor tensor) : buf_(tensor.buf()) {}
BufferArg(const VarHandle& var) : var_(var.node()), isVar_(true) {}
BufferArg(const BufHandle& buf) : buf_(buf.node()) {}
}
std::ostream& operator<<(std::ostream& stream, const Tensor& t) {
- stream << std::to_string(&t);
+ stream << std::to_string(t);
return stream;
}
}
}
-void print(const Tensor* t) {
+void print(const Tensor& t) {
std::cout << std::to_string(t);
}
return oss.str();
}
-std::string to_string(const Tensor* t) {
- if (!t) {
- return "(null tensor)\n";
- }
+std::string to_string(const Tensor& t) {
std::ostringstream oss;
// TODO: move this to Buf printer
- oss << "Tensor " << t->buf()->name_hint() << "[";
- for (const auto i : c10::irange(t->buf()->ndim())) {
+ oss << "Tensor " << t.buf()->name_hint() << "[";
+ for (const auto i : c10::irange(t.buf()->ndim())) {
if (i != 0) {
oss << ", ";
}
- oss << *t->buf()->dim(i);
+ oss << *t.buf()->dim(i);
}
- oss << "]:\n" << *t->stmt() << "\n";
+ oss << "]:\n" << *t.stmt() << "\n";
return oss.str();
}
} // namespace std
TORCH_API void print(ExprPtr expr);
TORCH_API void print(StmtPtr stmt);
-TORCH_API void print(const Tensor* t);
+TORCH_API void print(const Tensor& t);
} // namespace tensorexpr
} // namespace jit
TORCH_API std::string to_string(ExprPtr expr);
TORCH_API std::string to_string(StmtPtr stmt);
-TORCH_API std::string to_string(const Tensor* t);
+TORCH_API std::string to_string(const Tensor& t);
} // namespace std
return res.first;
}
-Tensor* computeOneOperand(
+Tensor computeOneOperand(
const std::string& name,
const std::vector<ArgValue>& inputValues,
const std::vector<ExprHandle>& outputShape,
});
}
-Tensor* computeTwoOperand(
+Tensor computeTwoOperand(
const std::string& name,
const std::vector<ArgValue>& inputValues,
const std::vector<ExprHandle>& outputShape,
});
}
-Tensor* computeTwoOperandWithAlpha(
+Tensor computeTwoOperandWithAlpha(
const std::string& name,
const std::vector<ArgValue>& inputValues,
const std::vector<ExprHandle>& outputShape,
});
}
-Tensor* computeConditionWithTwoOperand(
+Tensor computeConditionWithTwoOperand(
const std::string& name,
const std::vector<ArgValue>& inputValues,
const std::vector<ExprHandle>& outputShape,
});
}
-Tensor* computeThreeOperand(
+Tensor computeThreeOperand(
const std::string& name,
const std::vector<ArgValue>& inputValues,
const std::vector<ExprHandle>& outputShape,
return demoteOutput(compute, outputType);
});
}
-Tensor* computeFourOperand(
+Tensor computeFourOperand(
const std::string& name,
const std::vector<ArgValue>& inputValues,
const std::vector<ExprHandle>& outputShape,
}
return {highType, nonEmptyInputs};
}
-Tensor* computeCatWoConditionals(
+Tensor computeCatWoConditionals(
const std::vector<ArgValue>& inputs,
const std::vector<ExprHandle>& outputShape) {
// NOLINTNEXTLINE(performance-unnecessary-copy-initialization)
auto output_buf =
alloc<Buf>("aten_cat", output_sizes_expr, ToDtype(high_type));
if (non_empty_inputs.size() == 0) {
- return new Tensor(
+ return Tensor(
output_buf, alloc<tensorexpr::Block>(std::vector<StmtPtr>({})));
}
concat_dim_size =
alloc<Add>(concat_dim_size, input_dims[norm_concat_dim].node());
}
- return new Tensor(output_buf, IRSimplifier::simplify(block));
+ return Tensor(output_buf, IRSimplifier::simplify(block));
}
-Tensor* computeCat(
+Tensor computeCat(
const std::vector<ArgValue>& inputs,
const std::vector<ExprHandle>& outputShape,
at::Device device) {
});
}
-Tensor* computeConv2d(
+Tensor computeConv2d(
const std::vector<ArgValue>& inputs,
const std::vector<ExprHandle>& outputShape,
const c10::optional<ScalarType>& outputType) {
dilation[0],
dilation[1],
groups});
- return new Tensor(ResultBuf.node(), s);
+ return Tensor(ResultBuf.node(), s);
}
-Tensor* tensorexpr::computeOperandValue(
+Tensor tensorexpr::computeOperandValue(
c10::Symbol op,
const std::vector<ArgValue>& inputs,
const std::vector<ExprHandle>& outputShape,
return c10::nullopt;
}
-Tensor* TensorExprKernel::computeValue(const torch::jit::Value* v) {
+Tensor TensorExprKernel::computeValue(const torch::jit::Value* v) {
auto inputs = v->node()->inputs();
auto op = v->node()->kind();
return dims;
}
-Tensor* TensorExprKernel::bindInput(const torch::jit::Value* input) {
+Tensor TensorExprKernel::bindInput(const torch::jit::Value* input) {
auto const& t = input->type();
- Tensor* result = nullptr;
+ Tensor result(nullptr, nullptr);
switch (t->kind()) {
case TypeKind::TensorType: {
auto tt = input->type()->cast<TensorType>();
}
return inBuffer.load(idx);
});
- bufs_.emplace(input, result->buf());
+ bufs_.emplace(input, result.buf());
bufferArgs_.emplace_back(inBuffer);
break;
return (strides == at::infer_dense_strides(sizes, strides));
}
-Tensor* TensorExprKernel::convertOutputToCorrectStrides(torch::jit::Value* v) {
+Tensor TensorExprKernel::convertOutputToCorrectStrides(torch::jit::Value* v) {
const TensorTypePtr& tt = v->type()->expect<TensorType>();
TORCH_INTERNAL_ASSERT(bufs_.count(v));
BufPtr buf = bufs_.at(v);
auto sizes = *tt->sizes().concrete_sizes();
std::vector<int64_t> default_strides = TensorType::contiguousStridesOf(sizes);
if (!tt->strides().concrete_sizes()) {
- return new Tensor(buf, nullptr);
+ return Tensor(buf, nullptr);
}
TORCH_INTERNAL_ASSERT(tt->strides().concrete_sizes());
const std::vector<int64_t> strides = *tt->strides().concrete_sizes();
// All Tensors in NNC are layed out in default, contiguous layout.
// If the output is also default contiguous we don't need to do anything
if (strides == default_strides) {
- return new Tensor(buf, nullptr);
+ return Tensor(buf, nullptr);
}
// If the tensor is not dense or overlaps, we have
// no way of matching the profiled striding
if (!denseAndNonOverlapping(sizes, strides)) {
- return new Tensor(buf, nullptr);
+ return Tensor(buf, nullptr);
}
auto dims = c10::fmap<DimArg>(sizesForValue(v));
nInputs_ = graph_->inputs().size();
genInputDebugNames();
for (auto const& input : graph_->inputs()) {
- if (Tensor* t = bindInput(input)) {
- block->append_stmt(t->stmt());
+ Tensor t = bindInput(input);
+ if (t.stmt()) {
+ block->append_stmt(t.stmt());
}
}
} else {
for (auto const& output : n->outputs()) {
if (output->hasUses()) {
- Tensor* t = computeValue(output);
- bufs_.emplace(output, t->buf());
- // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
- block->append_stmt(t->stmt());
+ Tensor t = computeValue(output);
+ bufs_.emplace(output, t.buf());
+ block->append_stmt(t.stmt());
}
}
}
// The "strided" tensor will be incorrect if used in NNC,
// since NNC views it as contiguous. Only convert it to the right
// strides at the end of the kernel (if already contiguous it's a no-op)
- Tensor* properly_strided_output = convertOutputToCorrectStrides(output);
- if (properly_strided_output->stmt()) {
- block->append_stmt(properly_strided_output->stmt());
+ Tensor properly_strided_output = convertOutputToCorrectStrides(output);
+ if (properly_strided_output.stmt()) {
+ block->append_stmt(properly_strided_output.stmt());
}
// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
- bufs_[output] = properly_strided_output->buf();
+ bufs_[output] = properly_strided_output.buf();
const auto& tt = output->type()->expect<TensorType>();
auto sizes = *tt->sizes().concrete_sizes();
tensorOutputSizes_.push_back(sizes);
IntList,
ArgNone>;
-using NNCLoweringFunction = std::function<Tensor*(
+using NNCLoweringFunction = std::function<Tensor(
const std::vector<ArgValue>&,
const std::vector<ExprHandle>&,
const c10::optional<ScalarType>&,
c10::ScalarType dtype;
};
-TORCH_API Tensor* computeOperandValue(
+TORCH_API Tensor computeOperandValue(
c10::Symbol op,
const std::vector<ArgValue>& inputs,
const std::vector<ExprHandle>& outputShape,
const torch::jit::Value* v,
const std::vector<ExprHandle>& axes);
- Tensor* computeValue(const torch::jit::Value* v);
+ Tensor computeValue(const torch::jit::Value* v);
void bindConstant(const torch::jit::Value* v);
std::vector<at::Tensor>& outputs);
BackendType inferBackendTypeFromDevice(at::Device device);
- Tensor* bindInput(const torch::jit::Value* input);
+ Tensor bindInput(const torch::jit::Value* input);
- Tensor* convertOutputToCorrectStrides(torch::jit::Value* v);
+ Tensor convertOutputToCorrectStrides(torch::jit::Value* v);
// Captures the information for reduction operation nodes.
struct ReductionInfo {
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
LoopNest::LoopNest(
- const std::vector<Tensor*>& output_tensors,
- const std::vector<Tensor*>& tensors_to_compute) {
+ const std::vector<Tensor>& output_tensors,
+ const std::vector<Tensor>& tensors_to_compute) {
initialize(output_tensors, tensors_to_compute);
verify(root_stmt_);
}
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
-LoopNest::LoopNest(const std::vector<Tensor*>& output_tensors) {
+LoopNest::LoopNest(const std::vector<Tensor>& output_tensors) {
initialize(output_tensors, output_tensors);
verify(root_stmt_);
}
}
void LoopNest::initialize(
- const std::vector<Tensor*>& output_tensors,
- const std::vector<Tensor*>& tensors_to_compute) {
+ const std::vector<Tensor>& output_tensors,
+ const std::vector<Tensor>& tensors_to_compute) {
for (auto t : output_tensors) {
- output_bufs_.insert(t->buf());
+ output_bufs_.insert(t.buf());
}
std::vector<StmtPtr> loops;
- for (Tensor* t : tensors_to_compute) {
- StmtPtr loop = t->stmt();
+ for (Tensor t : tensors_to_compute) {
+ StmtPtr loop = t.stmt();
if (loop->get_parent()) {
std::cerr << "Error: creating a loopnest from already used Tensors\n";
loops = {};
}
}
-std::vector<ForPtr> LoopNest::getLoopStmtsFor(Tensor* t) const {
+std::vector<ForPtr> LoopNest::getLoopStmtsFor(Tensor t) const {
StmtPtr cur_stmt = getLoopBodyFor(t);
return getLoopStmtsFor(cur_stmt);
}
return result;
}
-StmtPtr LoopNest::getLoopBodyFor(Tensor* t) const {
- return getLoopBodyFor(t->buf());
+StmtPtr LoopNest::getLoopBodyFor(Tensor t) const {
+ return getLoopBodyFor(t.buf());
}
StmtPtr LoopNest::getLoopBodyFor(BufPtr buf) const {
public:
// A constructor for building a LoopNest from a list of Tensors
LoopNest(
- const std::vector<Tensor*>& output_tensors,
- const std::vector<Tensor*>& tensors_to_compute);
+ const std::vector<Tensor>& output_tensors,
+ const std::vector<Tensor>& tensors_to_compute);
// A convenience constructor for the case when all tensors are output tensors
- LoopNest(const std::vector<Tensor*>& output_tensors);
+ LoopNest(const std::vector<Tensor>& output_tensors);
// A constructor for building a LoopNest from an Stmt and a list of output
// buffers.
return root_stmt_;
}
- std::vector<ForPtr> getLoopStmtsFor(Tensor*) const;
+ std::vector<ForPtr> getLoopStmtsFor(Tensor) const;
std::vector<ForPtr> getLoopStmtsFor(BufPtr) const;
std::vector<ForPtr> getLoopStmtsFor(StmtPtr) const;
- StmtPtr getLoopBodyFor(Tensor*) const;
+ StmtPtr getLoopBodyFor(Tensor) const;
StmtPtr getLoopBodyFor(BufPtr) const;
// Returns the For stmt indexed by 'indices' in the 'root' For stmt.
private:
void initialize(
- const std::vector<Tensor*>& output_tensors,
- const std::vector<Tensor*>& tensors_to_compute);
+ const std::vector<Tensor>& output_tensors,
+ const std::vector<Tensor>& tensors_to_compute);
StmtPtr insertAllocFree(StmtPtr stmt);
const std::unordered_set<BufPtr> getIntermediateBufs() const;
using InitFunc = std::function<ExprHandle(const std::vector<VarHandle>&)>;
-Tensor* conv2d_depthwise_static(
+Tensor conv2d_depthwise_static(
BufHandle input,
BufHandle weight,
const InitFunc& init_func,
auto OH = (H - R + 2 * pad) / stride + 1;
auto OW = (W - S + 2 * pad) / stride + 1;
- Tensor* conv = Reduce(
+ Tensor conv = Reduce(
"conv2d_depthwise",
{{N, "n"}, {K, "k"}, {OH, "oh"}, {OW, "ow"}},
Sum(),
} else if (R == 3 && stride == 1 && pad == 1) {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ForPtr main, peeled;
- auto loops = nest.getAllLoopNestsWritingToBuf(conv->buf());
+ auto loops = nest.getAllLoopNestsWritingToBuf(conv.buf());
main = loops[1][kLoopW];
nest.sliceHead(main, 1, &peeled, &main);
nest.sliceTail(main, 1, &main, &peeled);
nest.sliceTail(main, 1, &main, &peeled);
}
- return new Tensor(conv->buf(), nest.root_stmt());
+ return Tensor(conv.buf(), nest.root_stmt());
}
-Tensor* conv2d_depthwise_dynamic(
+Tensor conv2d_depthwise_dynamic(
BufHandle input,
BufHandle weight,
const InitFunc& init_func,
} // namespace
-Tensor* conv2d_depthwise(
+Tensor conv2d_depthwise(
BufHandle input,
BufHandle weight,
BufHandle bias,
return conv2d_depthwise_static(input, weight, init_func, stride, pad, groups);
}
-Tensor* conv2d_depthwise(
+Tensor conv2d_depthwise(
BufHandle input,
BufHandle weight,
int stride,
return conv2d_depthwise_static(input, weight, init_func, stride, pad, groups);
}
-Tensor* conv2d_depthwise(
+Tensor conv2d_depthwise(
BufHandle input,
BufHandle weight,
BufHandle bias,
groups);
}
-Tensor* conv2d_depthwise(
+Tensor conv2d_depthwise(
BufHandle input,
BufHandle weight,
ExprHandle N,
namespace tensorexpr {
// An API to compute 2D depthwise convolutions with bias.
-TORCH_API Tensor* conv2d_depthwise(
+TORCH_API Tensor conv2d_depthwise(
BufHandle input,
BufHandle weight,
BufHandle bias,
int groups);
// An API to compute 2D depthwise convolutions without bias.
-TORCH_API Tensor* conv2d_depthwise(
+TORCH_API Tensor conv2d_depthwise(
BufHandle input,
BufHandle weight,
int stride,
int pad,
int groups);
-TORCH_API Tensor* conv2d_depthwise(
+TORCH_API Tensor conv2d_depthwise(
BufHandle input,
BufHandle weight,
BufHandle bias,
ExprHandle pad,
ExprHandle groups);
-TORCH_API Tensor* conv2d_depthwise(
+TORCH_API Tensor conv2d_depthwise(
BufHandle input,
BufHandle weight,
ExprHandle N,
namespace jit {
namespace tensorexpr {
-Tensor* computeMatmul(
+Tensor computeMatmul(
const std::vector<ArgValue>& inputs,
const std::vector<ExprHandle>& outputShape,
const c10::optional<ScalarType>& outputType) {
},
{{size_a[1], "K"}});
} else {
- return new Tensor(
+ return Tensor(
ResultBuf.node(),
ExternalCall::make(ResultBuf, "nnc_aten_matmul", {a, b}, {}));
}
}
-Tensor* computeAddMM(
+Tensor computeAddMM(
const std::vector<ArgValue>& inputs,
const std::vector<ExprHandle>& outputShape,
const c10::optional<ScalarType>& outputType) {
dtype = Dtype(*outputType);
}
BufHandle ResultBuf("addmm", outputShape, dtype);
- return new Tensor(
+ return Tensor(
ResultBuf.node(),
ExternalCall::make(
ResultBuf,
namespace jit {
namespace tensorexpr {
-Tensor* computeMatmul(
+Tensor computeMatmul(
const std::vector<ArgValue>& inputs,
const std::vector<ExprHandle>& outputShape,
const c10::optional<ScalarType>& outputType);
-Tensor* computeAddMM(
+Tensor computeAddMM(
const std::vector<ArgValue>& inputs,
const std::vector<ExprHandle>& outputShape,
const c10::optional<ScalarType>& outputType);
namespace jit {
namespace tensorexpr {
-Tensor* computeBatchNorm(
+Tensor computeBatchNorm(
const std::vector<ArgValue>& inputs,
const std::vector<ExprHandle>& outputShape,
const c10::optional<ScalarType>& outputType) {
namespace jit {
namespace tensorexpr {
-Tensor* computeBatchNorm(
+Tensor computeBatchNorm(
const std::vector<ArgValue>& inputs,
const std::vector<ExprHandle>& outputShape,
const c10::optional<ScalarType>& outputType);
namespace jit {
namespace tensorexpr {
-Tensor* computeSum(
+Tensor computeSum(
const std::vector<ArgValue>& inputs,
const c10::optional<ScalarType>& outputType) {
std::vector<size_t> axes;
reductionDims);
}
-Tensor* computeMean(
+Tensor computeMean(
const std::vector<ArgValue>& inputs,
const std::vector<ExprHandle>& outputShape,
const c10::optional<ScalarType>& outputType) {
mean_dims_expr.emplace_back(idx);
}
}
- return new Tensor(
+ return Tensor(
ResultBuf.node(),
ExternalCall::make(
ResultBuf, "nnc_aten_mean", {InputBuf}, mean_dims_expr));
}
-Tensor* computeAdaptiveAvgPool2d(
+Tensor computeAdaptiveAvgPool2d(
const std::vector<ArgValue>& inputs,
const std::vector<ExprHandle>& outputShape,
const c10::optional<ScalarType>& outputType) {
BufHandle ResultBuf("adaptive_avgpool2d", outputShape, dtype);
// NOLINTNEXTLINE(performance-unnecessary-copy-initialization)
auto out_size_param = c10::get<IntList>(inputs[1]);
- return new Tensor(
+ return Tensor(
ResultBuf.node(),
ExternalCall::make(
ResultBuf,
namespace jit {
namespace tensorexpr {
-TORCH_API Tensor* computeSum(
+TORCH_API Tensor computeSum(
const std::vector<ArgValue>& inputs,
const c10::optional<ScalarType>& outputType);
-TORCH_API Tensor* computeMean(
+TORCH_API Tensor computeMean(
const std::vector<ArgValue>& inputs,
const std::vector<ExprHandle>& outputShape,
const c10::optional<ScalarType>& outputType);
-TORCH_API Tensor* computeAdaptiveAvgPool2d(
+TORCH_API Tensor computeAdaptiveAvgPool2d(
const std::vector<ArgValue>& inputs,
const std::vector<ExprHandle>& outputShape,
const c10::optional<ScalarType>& outputType);
using namespace torch::jit::tensorexpr;
-Tensor* computeSoftmax(
+Tensor computeSoftmax(
const std::vector<ArgValue>& inputs,
const std::vector<ExprHandle>& outputShape,
bool log_softmax) {
Compute("aten_softmax_exp", output_dims, [&](ParameterList& indices) {
auto inp = tensorOrConstant(
inputs[0], convert_indices_to_expr_handle(indices));
- return exp(inp - max->load(remove_softmax_dim_index(indices)));
+ return exp(inp - max.load(remove_softmax_dim_index(indices)));
});
auto sum = Reduce(
"aten_softmax_sum",
non_softmax_dims,
Sum(),
[&](ParameterList& indices) {
- return e->load(move_softmax_dim_index_to_pos(indices));
+ return e.load(move_softmax_dim_index_to_pos(indices));
},
{output_dims[softmax_dim]});
if (!log_softmax) {
auto result =
Compute("aten_softmax", output_dims, [&](ParameterList& indices) {
- return e->load(indices) /
- sum->load(remove_softmax_dim_index(indices));
+ return e.load(indices) / sum.load(remove_softmax_dim_index(indices));
});
- return new Tensor(
- result->buf(),
+ return Tensor(
+ result.buf(),
alloc<tensorexpr::Block>(std::vector<StmtPtr>(
- {max->stmt(), e->stmt(), sum->stmt(), result->stmt()})));
+ {max.stmt(), e.stmt(), sum.stmt(), result.stmt()})));
}
auto log_sum = Compute(
"aten_softmax_log_sum", non_softmax_dims, [&](ParameterList& indices) {
- return log(sum->load(indices));
+ return log(sum.load(indices));
});
auto result =
Compute("aten_log_softmax", output_dims, [&](ParameterList& indices) {
auto inp = tensorOrConstant(
inputs[0], convert_indices_to_expr_handle(indices));
auto non_softmax_indices = remove_softmax_dim_index(indices);
- return inp - max->load(non_softmax_indices) -
- log_sum->load(non_softmax_indices);
+ return inp - max.load(non_softmax_indices) -
+ log_sum.load(non_softmax_indices);
});
- return new Tensor(
- result->buf(),
+ return Tensor(
+ result.buf(),
alloc<tensorexpr::Block>(std::vector<StmtPtr>(
- {max->stmt(),
- e->stmt(),
- sum->stmt(),
- log_sum->stmt(),
- result->stmt()})));
+ {max.stmt(), e.stmt(), sum.stmt(), log_sum.stmt(), result.stmt()})));
}
} // namespace tensorexpr
namespace jit {
namespace tensorexpr {
-Tensor* computeSoftmax(
+Tensor computeSoftmax(
const std::vector<ArgValue>& inputs,
const std::vector<ExprHandle>& outputShape,
bool log_softmax);
return s;
}
-Tensor* Compute(
+Tensor Compute(
const std::string& name,
const std::vector<DimArg>& dim_args,
const std::function<ExprHandle(const std::vector<VarHandle>&)>& body_func) {
unpack_dim_args(dim_args, &dims, &args);
ExprPtr body = body_func(VarVectorToVarHandleVector(args)).node();
BufPtr buf = alloc<Buf>(name, dims, body->dtype());
- return new Tensor(buf, args, body);
+ return Tensor(buf, args, body);
}
-Tensor* Compute(
+Tensor Compute(
const std::string& name,
const std::vector<DimArg>& dim_args,
const std::function<ExprHandle(const VarHandle&)>& body_func) {
unpack_dim_args(dim_args, &dims, &args);
ExprPtr body = body_func(VarHandle(args[0])).node();
BufPtr buf = alloc<Buf>(name, dims, body->dtype());
- return new Tensor(buf, args, body);
+ return Tensor(buf, args, body);
}
-Tensor* Compute(
+Tensor Compute(
const std::string& name,
const std::vector<DimArg>& dim_args,
const std::function<ExprHandle(const VarHandle&, const VarHandle&)>&
unpack_dim_args(dim_args, &dims, &args);
ExprPtr body = body_func(VarHandle(args[0]), VarHandle(args[1])).node();
BufPtr buf = alloc<Buf>(name, dims, body->dtype());
- return new Tensor(buf, args, body);
+ return Tensor(buf, args, body);
}
-Tensor* Compute(
+Tensor Compute(
const std::string& name,
const std::vector<DimArg>& dim_args,
const std::function<
body_func(VarHandle(args[0]), VarHandle(args[1]), VarHandle(args[2]))
.node();
BufPtr buf = alloc<Buf>(name, dims, body->dtype());
- return new Tensor(buf, args, body);
+ return Tensor(buf, args, body);
}
-Tensor* Compute(
+Tensor Compute(
const std::string& name,
const std::vector<DimArg>& dim_args,
const std::function<ExprHandle(
VarHandle(args[3]))
.node();
BufPtr buf = alloc<Buf>(name, dims, body->dtype());
- return new Tensor(buf, args, body);
+ return Tensor(buf, args, body);
}
-Tensor* Reduce(
+Tensor Reduce(
const std::string& name,
const std::vector<DimArg>& dim_args,
const Reducer& reducer,
reduce_args);
}
-Tensor* Reduce(
+Tensor Reduce(
const std::string& name,
const std::vector<DimArg>& dim_args,
const Reducer& reducer,
reduce_args);
}
-Tensor* Reduce(
+Tensor Reduce(
const std::string& name,
const std::vector<DimArg>& dim_args,
const Reducer& reducer,
- Tensor* tensor,
+ Tensor tensor,
const std::vector<DimArg>& reduce_args) {
return Reduce(
name,
dim_args,
reducer,
- [&](ParameterList& p) { return tensor->load(p); },
+ [&](ParameterList& p) { return tensor.load(p); },
reduce_args);
}
namespace jit {
namespace tensorexpr {
-class TORCH_API Tensor : KernelScopedObject {
+class TORCH_API Tensor {
public:
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
Tensor(BufPtr buf, const std::vector<VarPtr>& args, ExprPtr body)
}
template <typename T>
- inline ExprHandle load(const std::vector<T>& args);
+ inline ExprHandle load(const std::vector<T>& args) const;
template <typename... Ts>
- inline ExprHandle load(const Ts&... ts);
+ inline ExprHandle load(const Ts&... ts) const;
private:
StmtPtr constructStmt(
std::vector<ExprPtr> strides_;
};
-TORCH_API Tensor* Compute(
+TORCH_API Tensor Compute(
const std::string& func_name,
const std::vector<DimArg>& dim_args,
const std::function<ExprHandle(const VarHandle&)>& body_func);
-TORCH_API Tensor* Compute(
+TORCH_API Tensor Compute(
const std::string& func_name,
const std::vector<DimArg>& dim_args,
const std::function<ExprHandle(const VarHandle&, const VarHandle&)>&
body_func);
-TORCH_API Tensor* Compute(
+TORCH_API Tensor Compute(
const std::string& func_name,
const std::vector<DimArg>& dim_args,
const std::function<
ExprHandle(const VarHandle&, const VarHandle&, const VarHandle&)>&
body_func);
-TORCH_API Tensor* Compute(
+TORCH_API Tensor Compute(
const std::string& func_name,
const std::vector<DimArg>& dim_args,
const std::function<ExprHandle(
const VarHandle&,
const VarHandle&,
const VarHandle&)>& body_func);
-TORCH_API Tensor* Compute(
+TORCH_API Tensor Compute(
const std::string& func_name,
const std::vector<DimArg>& dim_args,
const std::function<ExprHandle(const std::vector<VarHandle>&)>& body_func);
// Handle reductions over a Reducer and a body_func which produces values.
template <typename InitFunc, typename BodyFunc>
-Tensor* Reduce(
+Tensor Reduce(
const std::string& func_name,
const std::vector<DimArg>& dim_args,
const Reducer& reducer,
.node();
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
BufPtr func_result = alloc<Buf>(func_name, dims, body->dtype());
- return new Tensor(func_result, vars, body);
+ return Tensor(func_result, vars, body);
}
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
ReduceOpPtr reduce_op = reducer(func_result, body, output_args, reduce_vars);
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
- Tensor* t =
- new Tensor(func_result, vars, reduce_dims, reduce_vars, reduce_op);
+ Tensor t = Tensor(func_result, vars, reduce_dims, reduce_vars, reduce_op);
return t;
}
template <typename BodyFunc>
-Tensor* Reduce(
+Tensor Reduce(
const std::string& func_name,
const std::vector<DimArg>& dim_args,
const Reducer& reducer,
// Overload which allows inline lambda functions for the body_func.
template <typename BodyFunc>
-Tensor* Reduce(
+Tensor Reduce(
const std::string& func_name,
const std::vector<DimArg>& dim_args,
const Reducer& reducer,
}
// Overload for the common case of all dimensions of a Placeholder.
-TORCH_API Tensor* Reduce(
+TORCH_API Tensor Reduce(
const std::string& func_name,
const std::vector<DimArg>& dim_args,
const Reducer& reducer,
const Placeholder& buffer,
const std::vector<DimArg>& reduce_args);
-TORCH_API Tensor* Reduce(
+TORCH_API Tensor Reduce(
const std::string& name,
const std::vector<DimArg>& dim_args,
const Reducer& reducer,
// Overload for the common case of all dimensions of a prevously Computed
// Tensor.
-TORCH_API Tensor* Reduce(
+TORCH_API Tensor Reduce(
const std::string& func_name,
const std::vector<DimArg>& dim_args,
const Reducer& reducer,
- Tensor* tensor,
+ Tensor tensor,
const std::vector<DimArg>& reduce_args);
template <typename... Ts>
-inline ExprHandle Tensor::load(const Ts&... ts) {
+inline ExprHandle Tensor::load(const Ts&... ts) const {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
std::vector<ExprHandle> params({ExprHandle(ts)...});
return Load::make(BufHandle(this->buf()), params);
}
template <typename T>
-inline ExprHandle Tensor::load(const std::vector<T>& args) {
+inline ExprHandle Tensor::load(const std::vector<T>& args) const {
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
std::vector<ExprHandle> params(args.begin(), args.end());
return Load::make(BufHandle(this->buf()), params);
const std::vector<ExprHandle>& args,
const ExprHandle& val) { return self.store(args, val); })
.def("data", [](Placeholder& self) { return BufHandle(self.data()); });
- py::class_<Tensor, std::unique_ptr<Tensor, py::nodelete>>(te, "Tensor")
- .def(py::init(
- [](BufHandle& b, StmtPtr s) { return new Tensor(b.node(), s); }))
+ py::class_<Tensor>(te, "Tensor")
+ .def(
+ py::init([](BufHandle& b, StmtPtr s) { return Tensor(b.node(), s); }))
.def(
"load",
[](Tensor& self, const std::vector<ExprHandle>& v) {
[](const std::string& func_name,
const std::vector<DimArg>& dim_args,
const Reducer& reducer,
- Tensor* buffer,
+ Tensor buffer,
const std::vector<DimArg>& reduce_args) {
return Reduce(func_name, dim_args, reducer, buffer, reduce_args);
},
.def(py::init(&ExternalCall::make));
py::class_<LoopNest>(te, "LoopNest")
- .def(py::init<const std::vector<Tensor*>&>())
+ .def(py::init<const std::vector<Tensor>&>())
.def(py::init([](StmtPtr s, const std::vector<BufHandle>& bufs) {
std::unordered_set<BufPtr> buf_nodes;
for (auto& buf : bufs) {
.def("prepare_for_codegen", &LoopNest::prepareForCodegen)
.def(
"get_loop_body_for",
- [](const LoopNest& self, Tensor* t) {
- return self.getLoopBodyFor(t);
- },
+ [](const LoopNest& self, Tensor t) { return self.getLoopBodyFor(t); },
py::return_value_policy::reference)
.def(
"get_loop_body_for",
py::return_value_policy::reference)
.def(
"get_loops_for",
- [](const LoopNest& self, Tensor* t) {
+ [](const LoopNest& self, Tensor t) {
return self.getLoopStmtsFor(t);
},
py::return_value_policy::reference)
py::class_<CodeGen::BufferArg>(te, "BufferArg")
.def(py::init<const Placeholder&>())
- .def(py::init<Tensor*>())
+ .def(py::init<Tensor>())
.def(py::init<const VarHandle&>())
.def(py::init<const BufHandle&>());
py::implicitly_convertible<Placeholder, CodeGen::BufferArg>();
- py::implicitly_convertible<Tensor*, CodeGen::BufferArg>();
+ py::implicitly_convertible<Tensor, CodeGen::BufferArg>();
py::implicitly_convertible<VarHandle, CodeGen::BufferArg>();
py::implicitly_convertible<BufHandle, CodeGen::BufferArg>();