From 62d02f2b577c223f94d0b190df3f158cd985c221 Mon Sep 17 00:00:00 2001 From: Mikhail Zolotukhin Date: Tue, 24 Aug 2021 00:29:22 -0700 Subject: [PATCH] [TensorExpr] Make 'Tensor' a value type. (#63586) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/63586 This is another commit in transition from KernelArena memory management. Tensor is essentially just a pair of and we don't need to dynamically allocate it at all - it's cheap to pass it by value, and that's what we're switching to in this commit. After this change nothing uses KernelScope/KernelArena and they can be safely removed. Differential Revision: D30429114 D30429114 Test Plan: Imported from OSS Reviewed By: navahgar Pulled By: ZolotukhinM fbshipit-source-id: f90b859cfe863692b7beffbe9bd0e4143df1e819 --- benchmarks/cpp/tensorexpr/bench_approx.cpp | 20 +- benchmarks/cpp/tensorexpr/bench_batchnorm.cpp | 4 +- benchmarks/cpp/tensorexpr/bench_compile.cpp | 40 +- benchmarks/cpp/tensorexpr/bench_concat.cpp | 4 +- benchmarks/cpp/tensorexpr/bench_gemm.cpp | 12 +- benchmarks/cpp/tensorexpr/bench_parallel.cpp | 2 +- benchmarks/cpp/tensorexpr/bench_reduce.cpp | 16 +- test/cpp/tensorexpr/test_approx.cpp | 4 +- test/cpp/tensorexpr/test_boundsinference.cpp | 190 +++---- test/cpp/tensorexpr/test_conv.cpp | 8 +- test/cpp/tensorexpr/test_cuda.cpp | 68 +-- test/cpp/tensorexpr/test_external_calls.cpp | 48 +- test/cpp/tensorexpr/test_ir_printer.cpp | 14 +- test/cpp/tensorexpr/test_kernel.cpp | 2 +- test/cpp/tensorexpr/test_llvm.cpp | 64 +-- test/cpp/tensorexpr/test_loopnest.cpp | 645 +++++++++++----------- test/cpp/tensorexpr/test_memdependency.cpp | 68 ++- test/cpp/tensorexpr/test_ops.cpp | 4 +- test/cpp/tensorexpr/test_reductions.cpp | 230 ++++---- test/cpp/tensorexpr/test_simplify.cpp | 2 +- test/cpp/tensorexpr/tutorial.cpp | 30 +- torch/csrc/jit/runtime/static/ops.cpp | 14 +- torch/csrc/jit/tensorexpr/codegen.h | 2 +- torch/csrc/jit/tensorexpr/ir_printer.cpp | 17 +- torch/csrc/jit/tensorexpr/ir_printer.h | 4 +- torch/csrc/jit/tensorexpr/kernel.cpp | 62 +-- torch/csrc/jit/tensorexpr/kernel.h | 10 +- torch/csrc/jit/tensorexpr/loopnest.cpp | 22 +- torch/csrc/jit/tensorexpr/loopnest.h | 14 +- torch/csrc/jit/tensorexpr/operators/conv2d.cpp | 18 +- torch/csrc/jit/tensorexpr/operators/conv2d.h | 8 +- torch/csrc/jit/tensorexpr/operators/matmul.cpp | 8 +- torch/csrc/jit/tensorexpr/operators/matmul.h | 4 +- torch/csrc/jit/tensorexpr/operators/norm.cpp | 2 +- torch/csrc/jit/tensorexpr/operators/norm.h | 2 +- torch/csrc/jit/tensorexpr/operators/reduction.cpp | 10 +- torch/csrc/jit/tensorexpr/operators/reduction.h | 6 +- torch/csrc/jit/tensorexpr/operators/softmax.cpp | 31 +- torch/csrc/jit/tensorexpr/operators/softmax.h | 2 +- torch/csrc/jit/tensorexpr/tensor.cpp | 30 +- torch/csrc/jit/tensorexpr/tensor.h | 39 +- torch/csrc/jit/tensorexpr/tensorexpr_init.cpp | 20 +- 42 files changed, 881 insertions(+), 919 deletions(-) diff --git a/benchmarks/cpp/tensorexpr/bench_approx.cpp b/benchmarks/cpp/tensorexpr/bench_approx.cpp index 6e31697..92c2640 100644 --- a/benchmarks/cpp/tensorexpr/bench_approx.cpp +++ b/benchmarks/cpp/tensorexpr/bench_approx.cpp @@ -10,14 +10,14 @@ using namespace torch::jit; using namespace torch::jit::tensorexpr; -void vectorize(tensorexpr::LoopNest* ln, tensorexpr::Tensor* target, int width) { +void vectorize(tensorexpr::LoopNest* ln, tensorexpr::Tensor target, int width) { auto loops = ln->getLoopStmtsFor(target); ForPtr inner, tail; ln->splitWithTail(loops[0], width, &inner, &tail); ln->vectorize(inner); } -void optimizePointwise(tensorexpr::LoopNest* ln, tensorexpr::Tensor* target) { +void optimizePointwise(tensorexpr::LoopNest* ln, tensorexpr::Tensor target) { std::vector loops = ln->getLoopStmtsFor(target); ForPtr inner, tail; ln->splitWithTail(loops[0], 16 * 8, &inner, &tail); @@ -33,7 +33,7 @@ static void relu_nnc(benchmark::State& state) { auto N = VarHandle("N", kInt); Placeholder A("A", kFloat, {N}); auto clamp = 0; - torch::jit::tensorexpr::Tensor* B = Compute("B", {N}, [&](const VarHandle& i){ + torch::jit::tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i){ auto A_elem = [&]() { auto elem = A.load(i); auto min = FloatImm::make(clamp); @@ -67,7 +67,7 @@ static void log_nnc_sleef(benchmark::State& state) { KernelScope ks; auto N = VarHandle("N", kInt); Placeholder A("A", kFloat, {N}); - torch::jit::tensorexpr::Tensor* B = + torch::jit::tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i) { return log(A.load(i)); }); @@ -97,7 +97,7 @@ static void log_nnc_fast(benchmark::State& state) { KernelScope ks; auto N = VarHandle("N", kInt); Placeholder A("A", kFloat, {N}); - torch::jit::tensorexpr::Tensor* B = + torch::jit::tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i) { return fast_log(A.load(i)); }); @@ -127,7 +127,7 @@ static void log_nnc_vml(benchmark::State& state) { KernelScope ks; auto N = VarHandle("N", kInt); Placeholder A("A", kFloat, {N}); - torch::jit::tensorexpr::Tensor* B = + torch::jit::tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i) { return log_vml(A.load(i)); }); @@ -168,7 +168,7 @@ static void logit_nnc_sleef(benchmark::State& state) { auto N = VarHandle("N", kInt); Placeholder A("A", kFloat, {N}); auto clamp = 1e-6f; - tensorexpr::Tensor* B = Compute("B", {N}, [&](const VarHandle& i) { + tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i) { auto A_elem = [&]() { auto elem = A.load(i); auto min = FloatImm::make(clamp); @@ -205,7 +205,7 @@ static void logit_nnc_fast(benchmark::State& state) { auto N = VarHandle("N", kInt); Placeholder A("A", kFloat, {N}); auto clamp = 1e-6f; - tensorexpr::Tensor* B = Compute("B", {N}, [&](const VarHandle& i) { + tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i) { auto A_elem = [&]() { auto elem = A.load(i); auto min = FloatImm::make(clamp); @@ -242,7 +242,7 @@ static void logit_nnc_vml(benchmark::State& state) { auto N = VarHandle("N", kInt); Placeholder A("A", kFloat, {N}); auto clamp = 1e-6f; - tensorexpr::Tensor* B = Compute("B", {N}, [&](const VarHandle& i) { + tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i) { auto A_elem = [&]() { auto elem = A.load(i); auto min = FloatImm::make(clamp); @@ -319,7 +319,7 @@ static void tanh_nnc_fast(benchmark::State& state) { KernelScope ks; auto N = VarHandle("N", kInt); Placeholder A("A", kFloat, {N}); - torch::jit::tensorexpr::Tensor* B = + torch::jit::tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i) { return fast_tanh(A.load(i)); }); diff --git a/benchmarks/cpp/tensorexpr/bench_batchnorm.cpp b/benchmarks/cpp/tensorexpr/bench_batchnorm.cpp index 872594e..85bf9d3 100644 --- a/benchmarks/cpp/tensorexpr/bench_batchnorm.cpp +++ b/benchmarks/cpp/tensorexpr/bench_batchnorm.cpp @@ -84,7 +84,7 @@ BENCHMARK_DEFINE_F(BatchNorm, NNC)(benchmark::State& state) { VarHandle eps("eps", kFloat); using axis = const VarHandle&; - Tensor* output = Compute( + Tensor output = Compute( "output", {{N_, "N"}, {C_, "C"}, {H_, "H"}, {W_, "W"}}, [&](axis n, axis c, axis h, axis w) { @@ -147,7 +147,7 @@ BENCHMARK_DEFINE_F(BatchNorm, NNCRelu)(benchmark::State& state) { VarHandle eps("eps", kFloat); using axis = const VarHandle&; - Tensor* output = Compute( + Tensor output = Compute( "output", {{N_, "N"}, {C_, "C"}, {H_, "H"}, {W_, "W"}}, [&](axis n, axis c, axis h, axis w) { diff --git a/benchmarks/cpp/tensorexpr/bench_compile.cpp b/benchmarks/cpp/tensorexpr/bench_compile.cpp index 245d5d8..50d54e5 100644 --- a/benchmarks/cpp/tensorexpr/bench_compile.cpp +++ b/benchmarks/cpp/tensorexpr/bench_compile.cpp @@ -13,24 +13,24 @@ static void BM_CompileSwish(benchmark::State& state) { te::KernelScope ks; te::VarHandle n("n", te::kInt); te::Placeholder A(te::BufHandle("A", {N}, te::kFloat)); - te::Tensor* relu = te::Compute("relu", {{n, "n"}}, [&](const te::VarHandle& i) { + te::Tensor relu = te::Compute("relu", {{n, "n"}}, [&](const te::VarHandle& i) { return te::Max::make(A.load(i), 0.f, false); }); - te::Tensor* min6 = te::Compute("min6", {{n, "n"}}, [&](const te::VarHandle& i) { - return te::Min::make(relu->load(i), 6.f, false); + te::Tensor min6 = te::Compute("min6", {{n, "n"}}, [&](const te::VarHandle& i) { + return te::Min::make(relu.load(i), 6.f, false); }); - te::Tensor* plus3 = te::Compute("plus3", {{n, "n"}}, [&](const te::VarHandle& i) { - return min6->load(i) + 3.f; + te::Tensor plus3 = te::Compute("plus3", {{n, "n"}}, [&](const te::VarHandle& i) { + return min6.load(i) + 3.f; }); - te::Tensor* times = te::Compute("times", {{n, "n"}}, [&](const te::VarHandle& i) { - return A.load(i) * plus3->load(i); + te::Tensor times = te::Compute("times", {{n, "n"}}, [&](const te::VarHandle& i) { + return A.load(i) * plus3.load(i); }); - te::Tensor* sixth = te::Compute("sixth", {{n, "n"}}, [&](const te::VarHandle& i) { - return times->load(i) * 1.f / 6.f; + te::Tensor sixth = te::Compute("sixth", {{n, "n"}}, [&](const te::VarHandle& i) { + return times.load(i) * 1.f / 6.f; }); te::LoopNest nest({sixth}, {relu, min6, plus3, times, sixth}); for (auto tensor : {relu, min6, plus3, times}) { - nest.computeInline(tensor->buf()); + nest.computeInline(tensor.buf()); } nest.prepareForCodegen(); te::StmtPtr s = te::IRSimplifier::simplify(nest.root_stmt()); @@ -43,24 +43,24 @@ static void BM_CompileSwishLLVMOnly(benchmark::State& state) { te::KernelScope ks; te::VarHandle n("n", te::kInt); te::Placeholder A(te::BufHandle("A", {N}, te::kFloat)); - te::Tensor* relu = te::Compute("relu", {{n, "n"}}, [&](const te::VarHandle& i) { + te::Tensor relu = te::Compute("relu", {{n, "n"}}, [&](const te::VarHandle& i) { return te::Max::make(A.load(i), 0.f, false); }); - te::Tensor* min6 = te::Compute("min6", {{n, "n"}}, [&](const te::VarHandle& i) { - return te::Min::make(relu->load(i), 6.f, false); + te::Tensor min6 = te::Compute("min6", {{n, "n"}}, [&](const te::VarHandle& i) { + return te::Min::make(relu.load(i), 6.f, false); }); - te::Tensor* plus3 = te::Compute("plus3", {{n, "n"}}, [&](const te::VarHandle& i) { - return min6->load(i) + 3.f; + te::Tensor plus3 = te::Compute("plus3", {{n, "n"}}, [&](const te::VarHandle& i) { + return min6.load(i) + 3.f; }); - te::Tensor* times = te::Compute("times", {{n, "n"}}, [&](const te::VarHandle& i) { - return A.load(i) * plus3->load(i); + te::Tensor times = te::Compute("times", {{n, "n"}}, [&](const te::VarHandle& i) { + return A.load(i) * plus3.load(i); }); - te::Tensor* sixth = te::Compute("sixth", {{n, "n"}}, [&](const te::VarHandle& i) { - return times->load(i) * 1.f / 6.f; + te::Tensor sixth = te::Compute("sixth", {{n, "n"}}, [&](const te::VarHandle& i) { + return times.load(i) * 1.f / 6.f; }); te::LoopNest nest({sixth}, {relu, min6, plus3, times, sixth}); for (auto tensor : {relu, min6, plus3, times}) { - nest.computeInline(tensor->buf()); + nest.computeInline(tensor.buf()); } nest.prepareForCodegen(); te::StmtPtr s = te::IRSimplifier::simplify(nest.root_stmt()); diff --git a/benchmarks/cpp/tensorexpr/bench_concat.cpp b/benchmarks/cpp/tensorexpr/bench_concat.cpp index cb9aa84..856065d 100644 --- a/benchmarks/cpp/tensorexpr/bench_concat.cpp +++ b/benchmarks/cpp/tensorexpr/bench_concat.cpp @@ -60,7 +60,7 @@ class ConcatBench : public benchmark::Fixture { {input_sizes_[i][0], input_sizes_[i][1]})); } - Tensor* output = Compute( + Tensor output = Compute( "aten_cat", {{output_size_[0], "M"}, {output_size_[1], "N"}}, [&](const VarHandle& m, const VarHandle& n) { @@ -147,7 +147,7 @@ class ConcatBench : public benchmark::Fixture { for_stmts[i] = for_st; cumulative_input_sizes += input_sizes_[i][1]; } - auto output = new Tensor(output_buf, alloc(for_stmts)); + auto output = Tensor(output_buf, alloc(for_stmts)); LoopNest nest({output}); nest.prepareForCodegen(); diff --git a/benchmarks/cpp/tensorexpr/bench_gemm.cpp b/benchmarks/cpp/tensorexpr/bench_gemm.cpp index 7ebaa87..8646e97 100644 --- a/benchmarks/cpp/tensorexpr/bench_gemm.cpp +++ b/benchmarks/cpp/tensorexpr/bench_gemm.cpp @@ -44,7 +44,7 @@ BENCHMARK_DEFINE_F(Gemm, TensorExprNoopt)(benchmark::State& state) { te::Placeholder AP(te::BufHandle("A", {M, K}, te::kFloat)); te::Placeholder BP(te::BufHandle("B", {K, N}, te::kFloat)); - te::Tensor* CT = te::Reduce( + te::Tensor CT = te::Reduce( "gemm", {{M, "M"}, {N, "N"}}, te::Sum(), @@ -68,7 +68,7 @@ BENCHMARK_DEFINE_F(Gemm, TensorExprTile32x32)(benchmark::State& state) { te::Placeholder AP(te::BufHandle("A", {M, K}, te::kFloat)); te::Placeholder BP(te::BufHandle("B", {K, N}, te::kFloat)); - te::Tensor* CT = te::Reduce( + te::Tensor CT = te::Reduce( "gemm", {{M, "M"}, {N, "N"}}, te::Sum(), @@ -128,7 +128,7 @@ BENCHMARK_DEFINE_F(Gemm, TensorExprTile4x16)(benchmark::State& state) { te::Placeholder AP(te::BufHandle("A", {M, K}, te::kFloat)); te::Placeholder BP(te::BufHandle("B", {K, N}, te::kFloat)); - te::Tensor* CT = te::Reduce( + te::Tensor CT = te::Reduce( "gemm", {{M, "M"}, {N, "N"}}, te::Sum(), @@ -188,7 +188,7 @@ BENCHMARK_DEFINE_F(Gemm, TensorExprTile4x16VecUnroll)(benchmark::State& state) { te::Placeholder AP(te::BufHandle("A", {M, K}, te::kFloat)); te::Placeholder BP(te::BufHandle("B", {K, N}, te::kFloat)); - te::Tensor* CT = te::Reduce( + te::Tensor CT = te::Reduce( "gemm", {{M, "M"}, {N, "N"}}, te::Sum(), @@ -256,7 +256,7 @@ BENCHMARK_DEFINE_F(Gemm, TensorExprTile4x16Cache)(benchmark::State& state) { te::Placeholder AP(te::BufHandle("A", {M, K}, te::kFloat)); te::Placeholder BP(te::BufHandle("B", {K, N}, te::kFloat)); - te::Tensor* CT = te::Reduce( + te::Tensor CT = te::Reduce( "gemm", {{M, "M"}, {N, "N"}}, te::Sum(), @@ -302,7 +302,7 @@ BENCHMARK_DEFINE_F(Gemm, TensorExprTile4x16Cache)(benchmark::State& state) { } { auto const& loops = loop.getLoopStmtsFor(CT); - loop.cacheAccesses(CT->buf(), "C_regs", loops[2]); + loop.cacheAccesses(CT.buf(), "C_regs", loops[2]); } loop.prepareForCodegen(); diff --git a/benchmarks/cpp/tensorexpr/bench_parallel.cpp b/benchmarks/cpp/tensorexpr/bench_parallel.cpp index 966c9e2..847b66d 100644 --- a/benchmarks/cpp/tensorexpr/bench_parallel.cpp +++ b/benchmarks/cpp/tensorexpr/bench_parallel.cpp @@ -38,7 +38,7 @@ BENCHMARK_DEFINE_F(ParallelAdd, Simple)(benchmark::State& state) { KernelScope kernel_scope; Placeholder a_buf("a", kFloat, {M}); Placeholder b_buf("b", kFloat, {M}); - Tensor* c_tensor = Compute( + Tensor c_tensor = Compute( "c", {{M, "m"}}, [&](const VarHandle& m) { return a_buf.load(m) + b_buf.load(m); }); diff --git a/benchmarks/cpp/tensorexpr/bench_reduce.cpp b/benchmarks/cpp/tensorexpr/bench_reduce.cpp index be5dcc8..9d35701 100644 --- a/benchmarks/cpp/tensorexpr/bench_reduce.cpp +++ b/benchmarks/cpp/tensorexpr/bench_reduce.cpp @@ -222,7 +222,7 @@ BENCHMARK_DEFINE_F(Reduce1D, TeNaive)(benchmark::State& state) { int M = A.numel(); te::Placeholder AP(te::BufHandle("A", {M}, te::kFloat)); - te::Tensor* BT = te::Reduce( + te::Tensor BT = te::Reduce( "reduce_full", {{1, "N"}}, te::Sum(), @@ -255,7 +255,7 @@ BENCHMARK_DEFINE_F(Reduce1D, TeSplitTail)(benchmark::State& state) { int M = A.numel(); te::Placeholder AP(te::BufHandle("A", {M}, te::kFloat)); - te::Tensor* BT = te::Reduce( + te::Tensor BT = te::Reduce( "reduce_full", {{1, "N"}}, te::Sum(), @@ -296,7 +296,7 @@ BENCHMARK_DEFINE_F(Reduce1D, TeSplitMask)(benchmark::State& state) { int M = A.numel(); te::Placeholder AP(te::BufHandle("A", {M}, te::kFloat)); - te::Tensor* BT = te::Reduce( + te::Tensor BT = te::Reduce( "reduce_full", {{1, "N"}}, te::Sum(), @@ -339,7 +339,7 @@ BENCHMARK_DEFINE_F(Reduce1D, TeRfactorV1)(benchmark::State& state) { TORCH_CHECK(M % kChunkSize == 0); te::Placeholder AP(te::BufHandle("A", {M}, te::kFloat)); - te::Tensor* BT = te::Reduce( + te::Tensor BT = te::Reduce( "reduce_full", {}, te::Sum(), @@ -359,7 +359,7 @@ BENCHMARK_DEFINE_F(Reduce1D, TeRfactorV1)(benchmark::State& state) { loop.reorderAxis(mo, mi); loops = loop.getLoopStmtsFor(BT); - auto bt_body = loop.getAllWritesToBuf(BT->buf())[1]; + auto bt_body = loop.getAllWritesToBuf(BT.buf())[1]; TORCH_CHECK(loop.rfactor(bt_body, loops.at(0), &rfac_buf)); loop.reorderAxis(loops.at(0), loops.at(1)); @@ -390,7 +390,7 @@ BENCHMARK_DEFINE_F(Reduce1D, Op)(benchmark::State& state) { const int kChunkSize = 8; te::Placeholder a("A", te::kFloat, {M}); - te::Tensor* b = te::computeSum({a.handle(), te::IntList({0}), false}, at::kFloat); + te::Tensor b = te::computeSum({a.handle(), te::IntList({0}), false}, at::kFloat); te::LoopNest nest({b}); auto loops = nest.getLoopStmtsFor(b); @@ -453,7 +453,7 @@ BENCHMARK_DEFINE_F(Reduce2DCol, OpSchedule)(benchmark::State& state) { te::KernelScope ks; constexpr int kCacheSize = 1 << 12; te::Placeholder a("A", te::kFloat, {M, N}); - te::Tensor* b = te::computeSum({a.handle(), te::IntList({0}), false}, at::kFloat); + te::Tensor b = te::computeSum({a.handle(), te::IntList({0}), false}, at::kFloat); te::LoopNest nest({b}); auto sch = state.range(2); @@ -560,7 +560,7 @@ BENCHMARK_DEFINE_F(Reduce2DRow, OpSchedule)(benchmark::State& state) { te::KernelScope ks; constexpr int kChunkSize = 8; te::Placeholder a("A", te::kFloat, {M, N}); - te::Tensor* b = te::computeSum({a.handle(), te::IntList({1}), false}, at::kFloat); + te::Tensor b = te::computeSum({a.handle(), te::IntList({1}), false}, at::kFloat); te::LoopNest nest({b}); auto sch = state.range(2); diff --git a/test/cpp/tensorexpr/test_approx.cpp b/test/cpp/tensorexpr/test_approx.cpp index d761645..2a4ce94 100644 --- a/test/cpp/tensorexpr/test_approx.cpp +++ b/test/cpp/tensorexpr/test_approx.cpp @@ -11,7 +11,7 @@ using namespace torch::indexing; namespace te = torch::jit::tensorexpr; -static void vectorize(te::LoopNest* ln, te::Tensor* target, int width) { +static void vectorize(te::LoopNest* ln, te::Tensor target, int width) { auto loops = ln->getLoopStmtsFor(target); te::ForPtr inner, tail; ln->splitWithTail(loops[0], width, &inner, &tail); @@ -33,7 +33,7 @@ TEST(Approx, log_vml) { te::KernelScope ks; te::VarHandle N("N", te::kInt); te::Placeholder A("A", te::kFloat, {N}); - te::Tensor* B = te::Compute( + te::Tensor B = te::Compute( "B", {N}, [&](const te::VarHandle& i) { return log_vml(A.load(i)); }); te::LoopNest ln({B}); diff --git a/test/cpp/tensorexpr/test_boundsinference.cpp b/test/cpp/tensorexpr/test_boundsinference.cpp index fcfa8ce..d038665 100644 --- a/test/cpp/tensorexpr/test_boundsinference.cpp +++ b/test/cpp/tensorexpr/test_boundsinference.cpp @@ -49,7 +49,7 @@ TEST(BoundsInference, _1) { KernelScope kernel_scope; ExprHandle n(100); Placeholder a(BufHandle("a", {n}, kFloat)); - Tensor* b = + Tensor b = Compute("b", {{n, "i"}}, [&](const VarHandle& i) { return a.load(i); }); LoopNest l({b}); auto bounds_info = inferBounds(l.root_stmt()); @@ -60,9 +60,9 @@ TEST(BoundsInference, _1) { ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad); verifyConstBounds(bounds_info.at(a.data())[0], {{0, 99}}); - ASSERT_EQ(bounds_info.at(b->buf()).size(), 1); - ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kStore); - verifyConstBounds(bounds_info.at(b->buf())[0], {{0, 99}}); + ASSERT_EQ(bounds_info.at(b.buf()).size(), 1); + ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kStore); + verifyConstBounds(bounds_info.at(b.buf())[0], {{0, 99}}); } TEST(BoundsInference, _2) { @@ -74,7 +74,7 @@ TEST(BoundsInference, _2) { KernelScope kernel_scope; VarHandle n("n", kInt); Placeholder a(BufHandle("a", {n}, kFloat)); - Tensor* b = + Tensor b = Compute("b", {{n, "i"}}, [&](const VarHandle& i) { return a.load(i); }); LoopNest l({b}); auto bounds_info = inferBounds(l.root_stmt()); @@ -85,9 +85,9 @@ TEST(BoundsInference, _2) { ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad); verifyConstBounds(bounds_info.at(a.data())[0], {{0, -1}}); - ASSERT_EQ(bounds_info.at(b->buf()).size(), 1); - ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kStore); - verifyConstBounds(bounds_info.at(b->buf())[0], {{0, -1}}); + ASSERT_EQ(bounds_info.at(b.buf()).size(), 1); + ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kStore); + verifyConstBounds(bounds_info.at(b.buf())[0], {{0, -1}}); } TEST(BoundsInference, _3) { @@ -99,7 +99,7 @@ TEST(BoundsInference, _3) { KernelScope kernel_scope; ExprHandle n(100); Placeholder a(BufHandle("a", {n + 10}, kFloat)); - Tensor* b = Compute("b", {{n, "i"}}, [&](const VarHandle& i) { + Tensor b = Compute("b", {{n, "i"}}, [&](const VarHandle& i) { return a.load(i) * a.load(i + 10); }); LoopNest l({b}); @@ -111,9 +111,9 @@ TEST(BoundsInference, _3) { ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad); verifyConstBounds(bounds_info.at(a.data())[0], {{0, 109}}); - ASSERT_EQ(bounds_info.at(b->buf()).size(), 1); - ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kStore); - verifyConstBounds(bounds_info.at(b->buf())[0], {{0, 99}}); + ASSERT_EQ(bounds_info.at(b.buf()).size(), 1); + ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kStore); + verifyConstBounds(bounds_info.at(b.buf())[0], {{0, 99}}); } TEST(BoundsInference, _4) { @@ -129,13 +129,13 @@ TEST(BoundsInference, _4) { ExprHandle W(320); ExprHandle H(200); Placeholder a(BufHandle("a", {H, W}, kFloat)); - Tensor* b = Compute( + Tensor b = Compute( "b", {{H, "y"}, {W, "x"}}, [&](const VarHandle& y, const VarHandle& x) { return x * y; }); - Tensor* c = Compute( + Tensor c = Compute( "c", {{H, "y"}, {W, "x"}}, [&](const VarHandle& y, const VarHandle& x) { - return a.load(y, x) * b->load(y, x); + return a.load(y, x) * b.load(y, x); }); LoopNest l({c}); std::vector loops = l.getLoopStmtsFor(c); @@ -149,13 +149,13 @@ TEST(BoundsInference, _4) { ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad); verifyConstBounds(bounds_info.at(a.data())[0], {{0, 199}, {0, 319}}); - ASSERT_EQ(bounds_info.at(b->buf()).size(), 1); - ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kLoad); - verifyConstBounds(bounds_info.at(b->buf())[0], {{0, 199}, {0, 319}}); + ASSERT_EQ(bounds_info.at(b.buf()).size(), 1); + ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kLoad); + verifyConstBounds(bounds_info.at(b.buf())[0], {{0, 199}, {0, 319}}); - ASSERT_EQ(bounds_info.at(c->buf()).size(), 1); - ASSERT_EQ(bounds_info.at(c->buf())[0].kind, kStore); - verifyConstBounds(bounds_info.at(c->buf())[0], {{0, 199}, {0, 319}}); + ASSERT_EQ(bounds_info.at(c.buf()).size(), 1); + ASSERT_EQ(bounds_info.at(c.buf())[0].kind, kStore); + verifyConstBounds(bounds_info.at(c.buf())[0], {{0, 199}, {0, 319}}); } { // Infer bounds on the inner loop scope @@ -166,13 +166,13 @@ TEST(BoundsInference, _4) { ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad); verifyConstBounds(bounds_info.at(a.data())[0], {{-1, -1}, {0, 319}}); - ASSERT_EQ(bounds_info.at(b->buf()).size(), 1); - ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kLoad); - verifyConstBounds(bounds_info.at(b->buf())[0], {{-1, -1}, {0, 319}}); + ASSERT_EQ(bounds_info.at(b.buf()).size(), 1); + ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kLoad); + verifyConstBounds(bounds_info.at(b.buf())[0], {{-1, -1}, {0, 319}}); - ASSERT_EQ(bounds_info.at(c->buf()).size(), 1); - ASSERT_EQ(bounds_info.at(c->buf())[0].kind, kStore); - verifyConstBounds(bounds_info.at(c->buf())[0], {{-1, -1}, {0, 319}}); + ASSERT_EQ(bounds_info.at(c.buf()).size(), 1); + ASSERT_EQ(bounds_info.at(c.buf())[0].kind, kStore); + verifyConstBounds(bounds_info.at(c.buf())[0], {{-1, -1}, {0, 319}}); } { // Infer bounds on the inner loop body's scope @@ -183,13 +183,13 @@ TEST(BoundsInference, _4) { ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad); verifyConstBounds(bounds_info.at(a.data())[0], {{-1, -1}, {-1, -1}}); - ASSERT_EQ(bounds_info.at(b->buf()).size(), 1); - ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kLoad); - verifyConstBounds(bounds_info.at(b->buf())[0], {{-1, -1}, {-1, -1}}); + ASSERT_EQ(bounds_info.at(b.buf()).size(), 1); + ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kLoad); + verifyConstBounds(bounds_info.at(b.buf())[0], {{-1, -1}, {-1, -1}}); - ASSERT_EQ(bounds_info.at(c->buf()).size(), 1); - ASSERT_EQ(bounds_info.at(c->buf())[0].kind, kStore); - verifyConstBounds(bounds_info.at(c->buf())[0], {{-1, -1}, {-1, -1}}); + ASSERT_EQ(bounds_info.at(c.buf()).size(), 1); + ASSERT_EQ(bounds_info.at(c.buf())[0].kind, kStore); + verifyConstBounds(bounds_info.at(c.buf())[0], {{-1, -1}, {-1, -1}}); } } @@ -208,7 +208,7 @@ TEST(BoundsInference, _5) { KernelScope kernel_scope; ExprHandle n(100); Placeholder a(BufHandle("a", {n}, kFloat)); - Tensor* b = + Tensor b = Compute("b", {{n, "i"}}, [&](const VarHandle& i) { return a.load(i); }); LoopNest l({b}); @@ -229,9 +229,9 @@ TEST(BoundsInference, _5) { ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad); verifyConstBounds(bounds_info.at(a.data())[0], {{0, 95}}); - ASSERT_EQ(bounds_info.at(b->buf()).size(), 1); - ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kStore); - verifyConstBounds(bounds_info.at(b->buf())[0], {{0, 95}}); + ASSERT_EQ(bounds_info.at(b.buf()).size(), 1); + ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kStore); + verifyConstBounds(bounds_info.at(b.buf())[0], {{0, 95}}); } { // Verify inferred bounds for the tail loop @@ -242,9 +242,9 @@ TEST(BoundsInference, _5) { ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad); verifyConstBounds(bounds_info.at(a.data())[0], {{96, 99}}); - ASSERT_EQ(bounds_info.at(b->buf()).size(), 1); - ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kStore); - verifyConstBounds(bounds_info.at(b->buf())[0], {{96, 99}}); + ASSERT_EQ(bounds_info.at(b.buf()).size(), 1); + ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kStore); + verifyConstBounds(bounds_info.at(b.buf())[0], {{96, 99}}); } } @@ -263,13 +263,13 @@ TEST(BoundsInference, _6) { ExprHandle CW(32); ExprHandle CH(20); Placeholder a(BufHandle("a", {H, W}, kFloat)); - Tensor* b = Compute( + Tensor b = Compute( "b", {{H, "y"}, {W, "x"}}, [&](const VarHandle& y, const VarHandle& x) { return x * y; }); - Tensor* c = Compute( + Tensor c = Compute( "c", {{CH, "y"}, {CW, "x"}}, [&](const VarHandle& y, const VarHandle& x) { - return a.load(y + 100, x + 100) * b->load(y * 2, x * 5); + return a.load(y + 100, x + 100) * b.load(y * 2, x * 5); }); LoopNest l({c}); std::vector loops = l.getLoopStmtsFor(c); @@ -283,13 +283,13 @@ TEST(BoundsInference, _6) { ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad); verifyConstBounds(bounds_info.at(a.data())[0], {{100, 119}, {100, 131}}); - ASSERT_EQ(bounds_info.at(b->buf()).size(), 1); - ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kLoad); - verifyConstBounds(bounds_info.at(b->buf())[0], {{0, 38}, {0, 155}}); + ASSERT_EQ(bounds_info.at(b.buf()).size(), 1); + ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kLoad); + verifyConstBounds(bounds_info.at(b.buf())[0], {{0, 38}, {0, 155}}); - ASSERT_EQ(bounds_info.at(c->buf()).size(), 1); - ASSERT_EQ(bounds_info.at(c->buf())[0].kind, kStore); - verifyConstBounds(bounds_info.at(c->buf())[0], {{0, 19}, {0, 31}}); + ASSERT_EQ(bounds_info.at(c.buf()).size(), 1); + ASSERT_EQ(bounds_info.at(c.buf())[0].kind, kStore); + verifyConstBounds(bounds_info.at(c.buf())[0], {{0, 19}, {0, 31}}); } { // Infer bounds on the inner loop scope @@ -300,13 +300,13 @@ TEST(BoundsInference, _6) { ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad); verifyConstBounds(bounds_info.at(a.data())[0], {{-1, -1}, {100, 131}}); - ASSERT_EQ(bounds_info.at(b->buf()).size(), 1); - ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kLoad); - verifyConstBounds(bounds_info.at(b->buf())[0], {{-1, -1}, {0, 155}}); + ASSERT_EQ(bounds_info.at(b.buf()).size(), 1); + ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kLoad); + verifyConstBounds(bounds_info.at(b.buf())[0], {{-1, -1}, {0, 155}}); - ASSERT_EQ(bounds_info.at(c->buf()).size(), 1); - ASSERT_EQ(bounds_info.at(c->buf())[0].kind, kStore); - verifyConstBounds(bounds_info.at(c->buf())[0], {{-1, -1}, {0, 31}}); + ASSERT_EQ(bounds_info.at(c.buf()).size(), 1); + ASSERT_EQ(bounds_info.at(c.buf())[0].kind, kStore); + verifyConstBounds(bounds_info.at(c.buf())[0], {{-1, -1}, {0, 31}}); } { // Infer bounds on the inner loop body's scope @@ -317,13 +317,13 @@ TEST(BoundsInference, _6) { ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad); verifyConstBounds(bounds_info.at(a.data())[0], {{-1, -1}, {-1, -1}}); - ASSERT_EQ(bounds_info.at(b->buf()).size(), 1); - ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kLoad); - verifyConstBounds(bounds_info.at(b->buf())[0], {{-1, -1}, {-1, -1}}); + ASSERT_EQ(bounds_info.at(b.buf()).size(), 1); + ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kLoad); + verifyConstBounds(bounds_info.at(b.buf())[0], {{-1, -1}, {-1, -1}}); - ASSERT_EQ(bounds_info.at(c->buf()).size(), 1); - ASSERT_EQ(bounds_info.at(c->buf())[0].kind, kStore); - verifyConstBounds(bounds_info.at(c->buf())[0], {{-1, -1}, {-1, -1}}); + ASSERT_EQ(bounds_info.at(c.buf()).size(), 1); + ASSERT_EQ(bounds_info.at(c.buf())[0].kind, kStore); + verifyConstBounds(bounds_info.at(c.buf())[0], {{-1, -1}, {-1, -1}}); } } @@ -331,9 +331,9 @@ TEST(BoundsInference, Adjacent) { KernelScope kernel_scope; ExprHandle H(6); Placeholder a(BufHandle("a", {20}, kFloat)); - Tensor* b = + Tensor b = Compute("b", {{H, "x"}}, [&](const VarHandle& x) { return a.load(x); }); - Tensor* c = Compute( + Tensor c = Compute( "c", {{H, "x"}}, [&](const VarHandle& x) { return a.load(x + H); }); LoopNest l({b, c}); std::vector loops = NodeFinder::find(l.root_stmt()); @@ -348,9 +348,9 @@ TEST(BoundsInference, Adjacent) { ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad); verifyConstBounds(bounds_info.at(a.data())[0], {{0, 5}}); - ASSERT_EQ(bounds_info.at(b->buf()).size(), 1); - ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kStore); - verifyConstBounds(bounds_info.at(b->buf())[0], {{0, 5}}); + ASSERT_EQ(bounds_info.at(b.buf()).size(), 1); + ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kStore); + verifyConstBounds(bounds_info.at(b.buf())[0], {{0, 5}}); } { // Infer bounds on the inner loop scope @@ -362,9 +362,9 @@ TEST(BoundsInference, Adjacent) { ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad); verifyConstBounds(bounds_info.at(a.data())[0], {{6, 11}}); - ASSERT_EQ(bounds_info.at(c->buf()).size(), 1); - ASSERT_EQ(bounds_info.at(c->buf())[0].kind, kStore); - verifyConstBounds(bounds_info.at(c->buf())[0], {{0, 5}}); + ASSERT_EQ(bounds_info.at(c.buf()).size(), 1); + ASSERT_EQ(bounds_info.at(c.buf())[0].kind, kStore); + verifyConstBounds(bounds_info.at(c.buf())[0], {{0, 5}}); } { // Infer bounds on the high level program. @@ -377,24 +377,24 @@ TEST(BoundsInference, Adjacent) { ASSERT_EQ(bounds_info.at(a.data())[0].kind, kLoad); verifyConstBounds(bounds_info.at(a.data())[0], {{0, 11}}); - ASSERT_EQ(bounds_info.at(b->buf()).size(), 1); - ASSERT_EQ(bounds_info.at(b->buf())[0].kind, kStore); - verifyConstBounds(bounds_info.at(b->buf())[0], {{0, 5}}); + ASSERT_EQ(bounds_info.at(b.buf()).size(), 1); + ASSERT_EQ(bounds_info.at(b.buf())[0].kind, kStore); + verifyConstBounds(bounds_info.at(b.buf())[0], {{0, 5}}); - ASSERT_EQ(bounds_info.at(c->buf()).size(), 1); - ASSERT_EQ(bounds_info.at(c->buf())[0].kind, kStore); - verifyConstBounds(bounds_info.at(c->buf())[0], {{0, 5}}); + ASSERT_EQ(bounds_info.at(c.buf()).size(), 1); + ASSERT_EQ(bounds_info.at(c.buf())[0].kind, kStore); + verifyConstBounds(bounds_info.at(c.buf())[0], {{0, 5}}); } } TEST(BoundsInference, MultipleTopLoopLoad) { KernelScope kernel_scope; Placeholder a(BufHandle("a", {100}, kFloat)); - Tensor* b = + Tensor b = Compute("b", {{64, "x"}}, [&](const VarHandle& x) { return a.load(x); }); - Tensor* c = Compute( + Tensor c = Compute( "c", {{32, "x"}}, [&](const VarHandle& x) { return a.load(x + 10); }); - Tensor* d = Compute( + Tensor d = Compute( "d", {{96, "x"}}, [&](const VarHandle& x) { return a.load(x + 2); }); LoopNest l({b, c, d}); @@ -418,7 +418,7 @@ TEST(BoundsInference, MultipleTopLoopLoad) { // b, c, d only written. { - auto bounds = bounds_info[b->buf()]; + auto bounds = bounds_info[b.buf()]; ASSERT_EQ(bounds.size(), 1); auto bound = bounds[0]; ASSERT_EQ(bound.kind, TensorAccessKind::kStore); @@ -426,7 +426,7 @@ TEST(BoundsInference, MultipleTopLoopLoad) { verifyConstBounds(bound, {{0, 63}}); } { - auto bounds = bounds_info[c->buf()]; + auto bounds = bounds_info[c.buf()]; ASSERT_EQ(bounds.size(), 1); auto bound = bounds[0]; ASSERT_EQ(bound.kind, TensorAccessKind::kStore); @@ -434,7 +434,7 @@ TEST(BoundsInference, MultipleTopLoopLoad) { verifyConstBounds(bound, {{0, 31}}); } { - auto bounds = bounds_info[d->buf()]; + auto bounds = bounds_info[d.buf()]; ASSERT_EQ(bounds.size(), 1); auto bound = bounds[0]; ASSERT_EQ(bound.kind, TensorAccessKind::kStore); @@ -506,24 +506,24 @@ TEST(BoundsInference, MultipleTopLoopStore) { TEST(BoundsInference, CacheReads) { KernelScope kernel_scope; - Tensor* A = Compute( + Tensor A = Compute( "A", {{64, "i"}, {64, "j"}}, [](const VarHandle& i, const VarHandle& j) { return i * j; }); - Tensor* B = Compute( + Tensor B = Compute( "B", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) { - return A->load(i + 30, j + 3); + return A.load(i + 30, j + 3); }); - Tensor* C = Compute( + Tensor C = Compute( "C", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) { - return A->load(i + 10, j + 20) + A->load(i + 30, j + 40); + return A.load(i + 10, j + 20) + A.load(i + 30, j + 40); }); LoopNest l({B, C}); auto bounds_info_before = inferBounds(l.root_stmt()); StmtPtr j_loop = l.getLoopStmtsFor(B)[1]; - LoopNest::cacheAccesses(A->buf(), "A_local", j_loop); + LoopNest::cacheAccesses(A.buf(), "A_local", j_loop); auto bounds_info_after = inferBounds(l.root_stmt()); @@ -571,7 +571,7 @@ TEST(BoundsInference, CacheReads) { TEST(BoundsInference, Flattened) { KernelScope kernel_scope; - Tensor* b = Compute( + Tensor b = Compute( "b", {{3, "z"}, {4, "y"}, {5, "x"}}, [&](const VarHandle& z, const VarHandle& y, const VarHandle& x) { @@ -585,7 +585,7 @@ TEST(BoundsInference, Flattened) { // There's only one buffer. ASSERT_EQ(bounds_info.size(), 1); - auto& TABI = bounds_info[b->buf()][0]; + auto& TABI = bounds_info[b.buf()][0]; ASSERT_EQ(TABI.kind, TensorAccessKind::kStore); // Flattened bounds should have a single dimension. ASSERT_EQ(TABI.start.size(), 1); @@ -651,11 +651,11 @@ TEST(BoundsInference, GetPotentialHazards) { TEST(BoundsInference, GetPotentialHazardsLoopNoHazard) { KernelScope kernel_scope; - Tensor* A = Compute( + Tensor A = Compute( "A", {{64, "i"}, {64, "j"}}, [](const VarHandle& i, const VarHandle& j) { return i * j; }); - Tensor* B = Compute( + Tensor B = Compute( "B", {{64, "i"}, {64, "j"}}, [](const VarHandle& i, const VarHandle& j) { return (i + 1) * (j + 1); }); @@ -679,13 +679,13 @@ TEST(BoundsInference, GetPotentialHazardsLoopNoHazard) { TEST(BoundsInference, GetPotentialHazardsLoopCall) { KernelScope kernel_scope; - Tensor* A = Compute( + Tensor A = Compute( "A", {{64, "i"}, {64, "j"}}, [](const VarHandle& i, const VarHandle& j) { return i * j; }); - Tensor* B = Compute( + Tensor B = Compute( "B", {{64, "i"}, {64, "j"}}, [&](const VarHandle& i, const VarHandle& j) { - return A->load(i, j) + 5; + return A.load(i, j) + 5; }); LoopNest l({A, B}); @@ -706,7 +706,7 @@ TEST(BoundsInference, GetPotentialHazardsLoopCall) { TEST(BoundsInference, GetPotentialHazardsLoopSplit) { KernelScope kernel_scope; - Tensor* A = Compute( + Tensor A = Compute( "A", {{64, "i"}, {64, "j"}}, [](const VarHandle& i, const VarHandle& j) { return i * j; }); diff --git a/test/cpp/tensorexpr/test_conv.cpp b/test/cpp/tensorexpr/test_conv.cpp index 63881d0..293fbe2 100644 --- a/test/cpp/tensorexpr/test_conv.cpp +++ b/test/cpp/tensorexpr/test_conv.cpp @@ -30,7 +30,7 @@ TEST(Conv, DepthwiseConv2D) { te::Placeholder input("input", te::kFloat, {N, C, H, W}); te::Placeholder weight("weight", te::kFloat, {K, CperG, R, S}); te::Placeholder bias("bias", te::kFloat, {K}); - te::Tensor* output = te::conv2d_depthwise( + te::Tensor output = te::conv2d_depthwise( input.handle(), weight.handle(), bias.handle(), kStride, kPad, kGroups); te::LoopNest loop({output}); @@ -61,7 +61,7 @@ TEST(Conv, DepthwiseConv2DNoBias) { te::Placeholder input("input", te::kFloat, {N, C, H, W}); te::Placeholder weight("weight", te::kFloat, {K, CperG, R, S}); - te::Tensor* output = te::conv2d_depthwise( + te::Tensor output = te::conv2d_depthwise( input.handle(), weight.handle(), kStride, kPad, kGroups); te::LoopNest loop({output}); @@ -96,7 +96,7 @@ TEST(Conv, DepthwiseConv2DDynamicShapes) { te::Placeholder input("input", te::kFloat, {N_var, C_var, H_var, W_var}); te::Placeholder weight( "weight", te::kFloat, {K_var, CperG_var, R_var, S_var}); - te::Tensor* output = te::conv2d_depthwise( + te::Tensor output = te::conv2d_depthwise( input.handle(), weight.handle(), N_var, @@ -195,7 +195,7 @@ TEST(Conv, Conv2D) { te::Placeholder inputB(te::BufHandle("input", {N, C, H, W}, te::kFloat)); te::Placeholder filterB(te::BufHandle("filter", {K, C, R, S}, te::kFloat)); - te::Tensor* conv = te::Reduce( + te::Tensor conv = te::Reduce( "conv", {{N, "n"}, {K, "k"}, {OH, "oh"}, {OW, "ow"}}, te::Sum(), diff --git a/test/cpp/tensorexpr/test_cuda.cpp b/test/cpp/tensorexpr/test_cuda.cpp index e36e17a..ed5c070 100644 --- a/test/cpp/tensorexpr/test_cuda.cpp +++ b/test/cpp/tensorexpr/test_cuda.cpp @@ -34,7 +34,7 @@ static void testCudaTestVectorAdd01_impl() { Dtype dtype = ToDtype(); Placeholder a_buf("a", dtype, {num_iter, block_count, block_size}); Placeholder b_buf("b", dtype, {num_iter, block_count, block_size}); - Tensor* c = Compute( + Tensor c = Compute( "c", { {num_iter, "n"}, @@ -99,7 +99,7 @@ TEST(Cuda, Sigmoid_CUDA) { const int block_size = 128; Dtype dtype = ToDtype(); Placeholder a_buf("a", dtype, {num_iter, block_count, block_size}); - Tensor* c = Compute( + Tensor c = Compute( "c", { {num_iter, "n"}, @@ -165,7 +165,7 @@ static void testCudaTestVectorAdd02_impl(int N, int block_size) { KernelScope kernel_scope; Placeholder a_buf("a", kFloat, {N}); Placeholder b_buf("b", kFloat, {N}); - Tensor* c = Compute( + Tensor c = Compute( "c", { {N, "N"}, @@ -225,7 +225,7 @@ TEST(Cuda, HalfCast_CUDA) { KernelScope ks; auto half = ToDtype(); Placeholder a("a", half, {4}); - Tensor* b = Compute("b", {{4, "n"}}, [&](const VarHandle& i) { + Tensor b = Compute("b", {{4, "n"}}, [&](const VarHandle& i) { return Cast::make(kFloat, a.load(i)); }); @@ -267,7 +267,7 @@ TEST(Cuda, DynamicShape2D_CUDA) { VarHandle n("n", kInt); Placeholder a(BufHandle("a", {m, n}, kFloat)); Placeholder b(BufHandle("b", {m, n}, kFloat)); - Tensor* c = Compute( + Tensor c = Compute( "c", {{m, "m"}, {n, "n"}}, [&](const VarHandle& i, const VarHandle& j) { return a.load(i, j) + b.load(i, j); }); @@ -328,7 +328,7 @@ TEST(Cuda, TestRand01_CUDA) { const int num_iter = 3; const int block_count = 16; const int block_size = 128; - Tensor* c = Compute( + Tensor c = Compute( "c", { {num_iter, "n"}, @@ -387,7 +387,7 @@ TEST(Cuda, DynamicShapeSplit_CUDA) { constexpr int N = 4096; VarHandle n("n", kInt); Placeholder a(BufHandle("a", {n}, kFloat)); - Tensor* b = Compute( + Tensor b = Compute( "b", {{n, "n"}}, [&](const VarHandle& i) { return a.load(i) * 2.0f; }); LoopNest l({b}); ForPtr inner; @@ -928,16 +928,16 @@ TEST(Cuda, HalfSupport_CUDA) { KernelScope ks; auto half = ToDtype(); Placeholder a("a", half, {4}); - Tensor* b = Compute("b", {{4, "n"}}, [&](const VarHandle& i) { + Tensor b = Compute("b", {{4, "n"}}, [&](const VarHandle& i) { return Cast::make(half, ExprHandle(2.0f) * a.load(i)); }); - Tensor* c = Compute("c", {{4, "n"}}, [&](const VarHandle& i) { - return Cast::make(kFloat, Cast::make(half, ExprHandle(42)) + b->load(i)); + Tensor c = Compute("c", {{4, "n"}}, [&](const VarHandle& i) { + return Cast::make(kFloat, Cast::make(half, ExprHandle(42)) + b.load(i)); }); - Tensor* d = Compute("d", {{4, "n"}}, [&](const VarHandle& i) { - return Cast::make(half, c->load(i)); + Tensor d = Compute("d", {{4, "n"}}, [&](const VarHandle& i) { + return Cast::make(half, c.load(i)); }); LoopNest l({b, c, d}); @@ -986,7 +986,7 @@ TEST(Cuda, HalfPropagation_CUDA) { KernelScope kernel_scope; auto half = ToDtype(); Placeholder a("a", half, {4}); - Tensor* relu = Compute("relu", {{4, "n"}}, [&](const VarHandle& i) { + Tensor relu = Compute("relu", {{4, "n"}}, [&](const VarHandle& i) { return Max::make(a.load(i), ExprHandle(alloc(0)), true); }); @@ -1036,7 +1036,7 @@ TEST(Cuda, UnusedHalfArgument_CUDA) { Placeholder a("a", kFloat, {4}); auto half = ToDtype(); Placeholder b("b", half, {4}); - Tensor* relu = Compute("relu", {{4, "n"}}, [&](const VarHandle& i) { + Tensor relu = Compute("relu", {{4, "n"}}, [&](const VarHandle& i) { return Max::make(a.load(i), ExprHandle(alloc(0)), true); }); @@ -1168,10 +1168,10 @@ TEST(Cuda, MaskBlockDim_CUDA) { int B_SIZE = 50; Placeholder a_buf("a", kFloat, {A_SIZE}); Placeholder b_buf("b", kFloat, {B_SIZE}); - Tensor* c = Compute("c", {{A_SIZE, "i"}}, [&](const VarHandle& i) { + Tensor c = Compute("c", {{A_SIZE, "i"}}, [&](const VarHandle& i) { return a_buf.load(i) + 10; }); - Tensor* d = Compute("d", {{B_SIZE, "i"}}, [&](const VarHandle& i) { + Tensor d = Compute("d", {{B_SIZE, "i"}}, [&](const VarHandle& i) { return a_buf.load(i) + b_buf.load(i); }); @@ -1261,10 +1261,10 @@ TEST(Cuda, MaskThreadDim_CUDA) { int B_SIZE = 100; Placeholder a_buf("a", kFloat, {A_SIZE}); Placeholder b_buf("b", kFloat, {B_SIZE}); - Tensor* c = Compute("c", {{A_SIZE, "i"}}, [&](const VarHandle& i) { + Tensor c = Compute("c", {{A_SIZE, "i"}}, [&](const VarHandle& i) { return a_buf.load(i) + 10; }); - Tensor* d = Compute("d", {{B_SIZE, "i"}}, [&](const VarHandle& i) { + Tensor d = Compute("d", {{B_SIZE, "i"}}, [&](const VarHandle& i) { return a_buf.load(i / 2) + b_buf.load(i); }); @@ -1356,10 +1356,10 @@ TEST(Cuda, MaskMultiBlockDim_CUDA) { int B_SIZE = 50; Placeholder a_buf("a", kFloat, {A_SIZE}); Placeholder b_buf("b", kFloat, {B_SIZE}); - Tensor* c = Compute("c", {{A_SIZE, "i"}}, [&](const VarHandle& i) { + Tensor c = Compute("c", {{A_SIZE, "i"}}, [&](const VarHandle& i) { return a_buf.load(i) + 10; }); - Tensor* d = Compute("d", {{B_SIZE, "i"}}, [&](const VarHandle& i) { + Tensor d = Compute("d", {{B_SIZE, "i"}}, [&](const VarHandle& i) { return a_buf.load(i) + b_buf.load(i); }); @@ -1450,10 +1450,10 @@ TEST(Cuda, MaskBlockAndThreadDim_CUDA) { int B_SIZE = 50; Placeholder a_buf("a", kFloat, {A_SIZE}); Placeholder b_buf("b", kFloat, {B_SIZE}); - Tensor* c = Compute("c", {{A_SIZE, "i"}}, [&](const VarHandle& i) { + Tensor c = Compute("c", {{A_SIZE, "i"}}, [&](const VarHandle& i) { return a_buf.load(i) + 10; }); - Tensor* d = Compute("d", {{B_SIZE, "i"}}, [&](const VarHandle& i) { + Tensor d = Compute("d", {{B_SIZE, "i"}}, [&](const VarHandle& i) { return a_buf.load(i) + b_buf.load(i); }); @@ -1543,17 +1543,17 @@ TEST(Cuda, MaskMultiDim_CUDA) { int B_SIZE = 50; Placeholder a_buf("a", kFloat, {OUTER_SIZE, A_SIZE}); Placeholder b_buf("b", kFloat, {OUTER_SIZE, B_SIZE}); - Tensor* c = Compute( + Tensor c = Compute( "C", {{OUTER_SIZE, "i"}, {A_SIZE, "j"}}, [&](const VarHandle& i, const VarHandle& j) { return ExprHandle(2) * a_buf.load(i, j); }); - Tensor* d = Compute( + Tensor d = Compute( "D", {{OUTER_SIZE, "i"}, {B_SIZE, "j"}}, [&](const VarHandle& i, const VarHandle& j) { - return c->load(i, j * 2) + b_buf.load(i, j); + return c.load(i, j * 2) + b_buf.load(i, j); }); LoopNest l({c, d}); @@ -1673,17 +1673,17 @@ TEST(Cuda, MaskMultiDimSymbolic_CUDA) { VarHandle B_SIZE("B_SIZE", kInt); Placeholder a_buf("a", kFloat, {OUTER_SIZE, A_SIZE}); Placeholder b_buf("b", kFloat, {OUTER_SIZE, B_SIZE}); - Tensor* c = Compute( + Tensor c = Compute( "C", {{OUTER_SIZE, "i"}, {A_SIZE, "j"}}, [&](const VarHandle& i, const VarHandle& j) { return ExprHandle(2) * a_buf.load(i, j); }); - Tensor* d = Compute( + Tensor d = Compute( "D", {{OUTER_SIZE, "i"}, {B_SIZE, "j"}}, [&](const VarHandle& i, const VarHandle& j) { - return c->load(i, j * 2) + b_buf.load(i, j); + return c.load(i, j * 2) + b_buf.load(i, j); }); LoopNest l({c, d}); @@ -2087,17 +2087,17 @@ TEST(Cuda, MaskMultiDimMultiAxis_CUDA) { int B_SIZE = 15; Placeholder a_buf("a", kFloat, {OUTER_SIZE, A_SIZE}); Placeholder b_buf("b", kFloat, {OUTER_SIZE, B_SIZE}); - Tensor* c = Compute( + Tensor c = Compute( "C", {{OUTER_SIZE, "i"}, {A_SIZE, "j"}}, [&](const VarHandle& i, const VarHandle& j) { return ExprHandle(2) * a_buf.load(i, j); }); - Tensor* d = Compute( + Tensor d = Compute( "D", {{OUTER_SIZE, "i"}, {B_SIZE, "j"}}, [&](const VarHandle& i, const VarHandle& j) { - return c->load(i, j * 2) + b_buf.load(i, j); + return c.load(i, j * 2) + b_buf.load(i, j); }); LoopNest l({c, d}); @@ -2218,17 +2218,17 @@ TEST(Cuda, MaskMultiDimMultiLevel_CUDA) { int B_SIZE = 15; Placeholder a_buf("a", kFloat, {OUTER_A_SIZE, A_SIZE}); Placeholder b_buf("b", kFloat, {OUTER_B_SIZE, B_SIZE}); - Tensor* c = Compute( + Tensor c = Compute( "C", {{OUTER_A_SIZE, "i"}, {A_SIZE, "j"}}, [&](const VarHandle& i, const VarHandle& j) { return ExprHandle(2) * a_buf.load(i, j); }); - Tensor* d = Compute( + Tensor d = Compute( "D", {{OUTER_B_SIZE, "i"}, {B_SIZE, "j"}}, [&](const VarHandle& i, const VarHandle& j) { - return c->load(i, j * 2) + b_buf.load(i, j); + return c.load(i, j * 2) + b_buf.load(i, j); }); LoopNest l({c, d}); diff --git a/test/cpp/tensorexpr/test_external_calls.cpp b/test/cpp/tensorexpr/test_external_calls.cpp index 24ddfbf..a170e53 100644 --- a/test/cpp/tensorexpr/test_external_calls.cpp +++ b/test/cpp/tensorexpr/test_external_calls.cpp @@ -31,7 +31,7 @@ TEST(ExternalCall, Conv2d_float) { int64_t dilation = 1; int64_t groups = 1; - Tensor* Result = new Tensor( + Tensor Result = Tensor( ResultBuf.node(), ExternalCall::make( ResultBuf, @@ -95,7 +95,7 @@ TEST(ExternalCall, Conv2d_int) { int64_t dilation = 1; int64_t groups = 1; - Tensor* Result = new Tensor( + Tensor Result = Tensor( ResultBuf.node(), ExternalCall::make( ResultBuf, @@ -153,7 +153,7 @@ TEST(ExternalCall, Conv2d_nobias_noargs) { Placeholder Weight("Weight", kFloat, {16, 16, 1, 1}); BufHandle ResultBuf("Result", {1, 16, 112, 112}, kFloat); - Tensor* Result = new Tensor( + Tensor Result = Tensor( ResultBuf.node(), ExternalCall::make( ResultBuf, @@ -203,7 +203,7 @@ TEST(ExternalCall, Addmm_float) { int64_t beta = 2; int64_t alpha = 2; - Tensor* Result = new Tensor( + Tensor Result = Tensor( ResultBuf.node(), ExternalCall::make( ResultBuf, @@ -283,7 +283,7 @@ TEST(ExternalCall, Prepacked_Linear_float) { weight, bias, c10::optional(), c10::optional()); Placeholder DummyPrepacked("DummyPrepacked", kFloat, {1}); - Tensor* Result = new Tensor( + Tensor Result = Tensor( ResultBuf.node(), ExternalCall::make( ResultBuf, @@ -370,7 +370,7 @@ TEST(ExternalCall, Prepacked_Conv2d_float) { c10::optional()); Placeholder DummyPrepacked("DummyPrepacked", kFloat, {1}); - Tensor* Result = new Tensor( + Tensor Result = Tensor( ResultBuf.node(), ExternalCall::make( ResultBuf, @@ -431,7 +431,7 @@ TEST(ExternalCall, BinaryFloat) { Placeholder B("", kFloat, toExprHandleVec(bShape)); BufHandle ResultBuf("Result", toExprHandleVec(resShape), kFloat); - Tensor* Result = new Tensor( + Tensor Result = Tensor( ResultBuf.node(), ExternalCall::make( ResultBuf, @@ -516,7 +516,7 @@ TEST(ExternalCall, UnaryFloat) { Placeholder A("A", kFloat, toExprHandleVec(aShape)); BufHandle ResultBuf("Result", toExprHandleVec(resShape), kFloat); - Tensor* Result = new Tensor( + Tensor Result = Tensor( ResultBuf.node(), ExternalCall::make( ResultBuf, externCallName, {BufHandle(A.data())}, externCallArgs)); @@ -566,14 +566,14 @@ TEST(ExternalCall, ComputeInterop) { BufHandle ConvResultBuf("ConvResult", {1, 16, 32, 32}, kFloat); BufHandle MatmulResultBuf("MatmulResult", {1, 16, 32, 32}, kFloat); - Tensor* Input = Compute( + Tensor Input = Compute( "Input", {{1, "n"}, {16, "c"}, {32, "h"}, {32, "w"}}, [&](const VarHandle& n, const VarHandle& c, const VarHandle& h, const VarHandle& w) { return FloatImm::make(5.0f); }); - Tensor* Weight = Compute( + Tensor Weight = Compute( "Weight", {{16, "n"}, {16, "c"}, {1, "kh"}, {1, "kw"}}, [&](const VarHandle& n, @@ -581,28 +581,28 @@ TEST(ExternalCall, ComputeInterop) { const VarHandle& h, const VarHandle& w) { return FloatImm::make(6.0f); }); - Tensor* ConvResult = new Tensor( + Tensor ConvResult = Tensor( ConvResultBuf.node(), ExternalCall::make( ConvResultBuf, "nnc_aten_conv2d", - {BufHandle(Input->buf()), BufHandle(Weight->buf())}, + {BufHandle(Input.buf()), BufHandle(Weight.buf())}, {})); - Tensor* MatmulResult = new Tensor( + Tensor MatmulResult = Tensor( MatmulResultBuf.node(), ExternalCall::make( MatmulResultBuf, "nnc_aten_matmul", - {BufHandle(ConvResult->buf()), BufHandle(ConvResult->buf())}, + {BufHandle(ConvResult.buf()), BufHandle(ConvResult.buf())}, {})); - Tensor* Result = Compute( + Tensor Result = Compute( "Result", {{1, "n"}, {16, "c"}, {32, "h"}, {32, "w"}}, [&](const VarHandle& n, const VarHandle& c, const VarHandle& h, const VarHandle& w) { - return ConvResult->load(n, c, h, w) + MatmulResult->load(n, c, h, w); + return ConvResult.load(n, c, h, w) + MatmulResult.load(n, c, h, w); }); LoopNest l({Input, Weight, ConvResult, MatmulResult, Result}); @@ -658,31 +658,31 @@ TEST(ExternalCall, Inlining) { BufHandle MatmulResultBuf("MatmulResult", {8, 8}, kFloat); - Tensor* A = Compute( + Tensor A = Compute( "A", {{8, "i"}, {8, "j"}}, [&](const VarHandle& i, const VarHandle& j) { return FloatImm::make(5.0f); }); - Tensor* B = Compute( + Tensor B = Compute( "B", {{8, "i"}, {8, "j"}}, [&](const VarHandle& i, const VarHandle& j) { return FloatImm::make(4.0f); }); - Tensor* MatmulResult = new Tensor( + Tensor MatmulResult = Tensor( MatmulResultBuf.node(), ExternalCall::make( MatmulResultBuf, "nnc_aten_matmul", - {BufHandle(A->buf()), BufHandle(B->buf())}, + {BufHandle(A.buf()), BufHandle(B.buf())}, {})); - Tensor* Result = Compute( + Tensor Result = Compute( "Result", {{8, "i"}, {8, "j"}}, [&](const VarHandle& i, const VarHandle& j) { - return MatmulResult->load(i, j) + FloatImm::make(3.0f); + return MatmulResult.load(i, j) + FloatImm::make(3.0f); }); StmtPtr root_stmt = alloc(std::vector( - {A->stmt(), B->stmt(), MatmulResult->stmt(), Result->stmt()})); - LoopNest l(root_stmt, {Result->buf()}); + {A.stmt(), B.stmt(), MatmulResult.stmt(), Result.stmt()})); + LoopNest l(root_stmt, {Result.buf()}); // Inlining should not inline anything here since all Bufs are either // defined or used in ExternalCalls diff --git a/test/cpp/tensorexpr/test_ir_printer.cpp b/test/cpp/tensorexpr/test_ir_printer.cpp index 76d9247..e11ba06 100644 --- a/test/cpp/tensorexpr/test_ir_printer.cpp +++ b/test/cpp/tensorexpr/test_ir_printer.cpp @@ -56,30 +56,30 @@ TEST(IRPrinter, FunctionName) { int M = 4; int N = 20; - Tensor* producer = Compute( + Tensor producer = Compute( "producer", {{M, "m"}, {N, "n"}}, [&](const ExprHandle& m, const ExprHandle& n) { return m * n; }); - Tensor* chunk_0 = Compute( + Tensor chunk_0 = Compute( "chunk", {{M, "m"}, {N / 2, "n"}}, [&](const ExprHandle& m, const ExprHandle& n) { - return producer->load(m, n); + return producer.load(m, n); }); - Tensor* chunk_1 = Compute( + Tensor chunk_1 = Compute( "chunk", {{M, "m"}, {N / 2, "n"}}, [&](const ExprHandle& m, const ExprHandle& n) { - return producer->load(m, n + ExprHandle(N / 2)); + return producer.load(m, n + ExprHandle(N / 2)); }); - Tensor* consumer = Compute( + Tensor consumer = Compute( "consumer", {{M, "i"}, {N / 2, "j"}}, [&](const ExprHandle& i, const ExprHandle& j) { - return i * chunk_1->load(i, j); + return i * chunk_1.load(i, j); }); LoopNest l({chunk_0, chunk_1, consumer}); diff --git a/test/cpp/tensorexpr/test_kernel.cpp b/test/cpp/tensorexpr/test_kernel.cpp index 8f36f54..765522e 100644 --- a/test/cpp/tensorexpr/test_kernel.cpp +++ b/test/cpp/tensorexpr/test_kernel.cpp @@ -1329,7 +1329,7 @@ TEST_F(Kernel, CodegenInspection) { #endif } -Tensor* lowerNanToNum( +Tensor lowerNanToNum( const std::vector& inputs, const std::vector& outputShape, const c10::optional& outputType, diff --git a/test/cpp/tensorexpr/test_llvm.cpp b/test/cpp/tensorexpr/test_llvm.cpp index 75e6a06..6081403 100644 --- a/test/cpp/tensorexpr/test_llvm.cpp +++ b/test/cpp/tensorexpr/test_llvm.cpp @@ -593,10 +593,10 @@ TEST(LLVM, VectorizerLoadStoreTest) { KernelScope kernel_scope; Placeholder a(BufHandle("A", {1}, kInt)); - Tensor* c = + Tensor c = Compute("c", {{4, "i"}}, [&](const VarHandle& i) { return a.load(i); }); - Placeholder c_buf(BufHandle(c->buf())); + Placeholder c_buf(BufHandle(c.buf())); LoopNest l({c}); StmtPtr s = l.root_stmt(); ASSERT_TRUE(LoopNest::vectorize(to(to(s)->front()))); @@ -616,11 +616,11 @@ TEST(LLVM, VectorizeBitCast) { KernelScope kernel_scope; Placeholder a(BufHandle("A", {128}, kInt)); - Tensor* c = Compute("c", {{128, "i"}}, [&](const VarHandle& i) { + Tensor c = Compute("c", {{128, "i"}}, [&](const VarHandle& i) { return bitcast(a.load(i)); }); - Placeholder c_buf(BufHandle(c->buf())); + Placeholder c_buf(BufHandle(c.buf())); LoopNest l({c}); StmtPtr s = l.root_stmt(); ASSERT_TRUE(LoopNest::vectorize(to(to(s)->front()))); @@ -1217,12 +1217,12 @@ TEST(LLVM, StoreFloat) { TEST(LLVM, SimpleMath01) { KernelScope kernel_scope; const int N = 1024; - Tensor* tensor = Compute("f", {{N, "i"}}, [](const VarHandle& i) { + Tensor tensor = Compute("f", {{N, "i"}}, [](const VarHandle& i) { return cast(i * i + 1); }); LoopNest l({tensor}); StmtPtr stmt = l.root_stmt(); - Placeholder f_buf(BufHandle(tensor->buf())); + Placeholder f_buf(BufHandle(tensor.buf())); LLVMCodeGen cg(stmt, {f_buf}); PaddedBuffer f_v(N, "f_v"); @@ -1241,11 +1241,11 @@ TEST(LLVM, ComputeMul) { const int N = 1024; Placeholder a(BufHandle("a", {N}, kFloat)); Placeholder b(BufHandle("b", {N}, kFloat)); - Tensor* c = Compute("c", {{N, "i"}}, [&](const VarHandle& i) { + Tensor c = Compute("c", {{N, "i"}}, [&](const VarHandle& i) { return a.load(i) * b.load(i); }); - Placeholder c_buf(BufHandle(c->buf())); + Placeholder c_buf(BufHandle(c.buf())); LoopNest l({c}); StmtPtr s = l.root_stmt(); @@ -1265,12 +1265,12 @@ TEST(LLVM, BroadcastAdd) { const int N = 1024; Placeholder a(BufHandle("a", {M, N}, kFloat)); Placeholder b(BufHandle("b", {N}, kFloat)); - Tensor* c = Compute( + Tensor c = Compute( "c", {{M, "i"}, {N, "j"}}, [&](const VarHandle& i, const VarHandle& j) { return a.load(i, j) + b.load(j); }); - Placeholder c_buf(BufHandle(c->buf())); + Placeholder c_buf(BufHandle(c.buf())); LoopNest l({c}); l.prepareForCodegen(); StmtPtr s = l.root_stmt(); @@ -1372,7 +1372,7 @@ TEST(LLVM, TensorDynamicShapeAdd) { VarHandle n("n", kInt); Placeholder a(BufHandle("a", {n}, kFloat)); Placeholder b(BufHandle("b", {n}, kFloat)); - Tensor* c = Compute("c", {{n, "n"}}, [&](const VarHandle& i) { + Tensor c = Compute("c", {{n, "n"}}, [&](const VarHandle& i) { return a.load(i) + b.load(i); }); LoopNest l({c}); @@ -1396,7 +1396,7 @@ TEST(LLVM, DynamicShape2D) { VarHandle n("n", kInt); Placeholder a(BufHandle("a", {m, n}, kFloat)); Placeholder b(BufHandle("b", {m, n}, kFloat)); - Tensor* c = Compute( + Tensor c = Compute( "c", {{m, "m"}, {n, "n"}}, [&](const VarHandle& i, const VarHandle& j) { return a.load(i, j) + b.load(i, j); }); @@ -1428,7 +1428,7 @@ TEST(LLVM, EliminatedStmt) { KernelScope kernel_scope; Placeholder a(BufHandle("a", {1}, kFloat)); - Tensor* c = Compute("c", {{0, "m"}}, [&](const VarHandle& m) { return m; }); + Tensor c = Compute("c", {{0, "m"}}, [&](const VarHandle& m) { return m; }); LoopNest l({c}); l.prepareForCodegen(); @@ -1452,7 +1452,7 @@ TEST(LLVM, SimpleReduction) { // TODO: why doesn't implicit vector work? std::vector axis = {DimArg(1)}; std::vector reduce_axis = {DimArg(M), DimArg(N)}; - Tensor* b = Reduce("sum", axis, Sum(), a, reduce_axis); + Tensor b = Reduce("sum", axis, Sum(), a, reduce_axis); LoopNest loop({b}); loop.prepareForCodegen(); @@ -1491,7 +1491,7 @@ TEST(LLVM, RFactorReduction) { // TODO: why doesn't implicit vector work? std::vector axis = {DimArg(1)}; std::vector reduce_axis = {DimArg(M), DimArg(N)}; - Tensor* b = Reduce("sum", axis, Sum(), a, reduce_axis); + Tensor b = Reduce("sum", axis, Sum(), a, reduce_axis); LoopNest loop({b}); std::vector loops = loop.getLoopStmtsFor(b); @@ -1502,7 +1502,7 @@ TEST(LLVM, RFactorReduction) { loops = loop.getLoopStmtsFor(b); loop_m = loops.at(2); loop_n = loops.at(1); - auto b_body = loop.getAllWritesToBuf(b->buf())[1]; + auto b_body = loop.getAllWritesToBuf(b.buf())[1]; ASSERT_TRUE(loop.rfactor(b_body, loop_n)); loop.prepareForCodegen(); @@ -1538,13 +1538,13 @@ TEST(LLVM, RFactorVectorizedReduction) { Placeholder a("a", kFloat, {1, M, N}); - Tensor* b = Reduce("sum", {{1, "K"}}, Sum(), a, {{M, "M"}, {N, "N"}}); + Tensor b = Reduce("sum", {{1, "K"}}, Sum(), a, {{M, "M"}, {N, "N"}}); LoopNest loopnest({b}); std::vector loops = loopnest.getLoopStmtsFor(b); // Reorder n and m loops loopnest.reorderAxis(loops.at(1), loops.at(2)); - auto b_body = loopnest.getAllWritesToBuf(b->buf()).at(1); - auto all_loops = loopnest.getAllLoopNestsWritingToBuf(b->buf()); + auto b_body = loopnest.getAllWritesToBuf(b.buf()).at(1); + auto all_loops = loopnest.getAllLoopNestsWritingToBuf(b.buf()); ASSERT_TRUE(all_loops.size() == 2 && all_loops[1].size() == 3); ASSERT_TRUE(loopnest.rfactor(b_body, all_loops[1][1])); auto distributed_loops = loopnest.distributeLoop(all_loops[1][1]); @@ -1585,7 +1585,7 @@ TEST(LLVM, SimpleParallel) { KernelScope kernel_scope; const int M = 4; const int N = 6; - Tensor* f = Compute( + Tensor f = Compute( "f", {{M, "m"}, {N, "n"}}, [](const VarHandle& m, const VarHandle& n) { return cast(m + n); }); @@ -1626,23 +1626,23 @@ TEST(LLVM, CompositeParallel) { KernelScope kernel_scope; int M = 5; int N = 7; - Tensor* t1 = + Tensor t1 = Compute("t1", {{M, "M"}}, [](const VarHandle& m) { return m + 1.f; }); - Tensor* t2 = + Tensor t2 = Compute("t2", {{N, "N"}}, [](const VarHandle& n) { return n + 2.f; }); - Tensor* t3 = Compute( + Tensor t3 = Compute( "t3", {{M, "M"}, {N, "N"}}, [=](const VarHandle& m, const VarHandle& n) { - return t1->load(m) * t2->load(n); + return t1.load(m) * t2.load(n); }); - Tensor* t4 = Compute( + Tensor t4 = Compute( "t4", {{M, "M"}, {N, "N"}}, [=](const VarHandle& m, const VarHandle& n) { - return t3->load(m, n) + m + n; + return t3.load(m, n) + m + n; }); - LoopNest loop_nest(std::vector({t4}), {t1, t2, t3, t4}); + LoopNest loop_nest({t4}, {t1, t2, t3, t4}); std::vector loop_list; { auto const& loops = loop_nest.getLoopStmtsFor(t1); @@ -1695,7 +1695,7 @@ TEST(LLVM, VectorizedGEMM) { Placeholder AP(BufHandle("A", {M, K}, kFloat)); Placeholder BP(BufHandle("B", {K, N}, kFloat)); - Tensor* CT = Reduce( + Tensor CT = Reduce( "gemm", {{M, "M"}, {N, "N"}}, Sum(), @@ -1776,7 +1776,7 @@ TEST(LLVM, CallRaw) { VarHandle N("N", kInt); Placeholder a(BufHandle("a", {M, N}, kFloat)); Placeholder b(BufHandle("b", {N}, kFloat)); - Tensor* c = Compute( + Tensor c = Compute( "c", {{M, "i"}, {N, "j"}}, [&](const VarHandle& i, const VarHandle& j) { return a.load(i, j) + b.load(j); }); @@ -1793,7 +1793,7 @@ TEST(LLVM, CallRaw) { std::vector cv(M * N_value, 0); std::vector args({av.data(), bv.data(), cv.data(), &N_value}); - LLVMCodeGen cg(s, {a, b, BufHandle(c->buf()), N}); + LLVMCodeGen cg(s, {a, b, BufHandle(c.buf()), N}); cg.call_raw(args); for (int i = 0; i < M; i++) { @@ -1802,7 +1802,7 @@ TEST(LLVM, CallRaw) { } } - SimpleIREvaluator eval(s, {a, b, BufHandle(c->buf()), N}); + SimpleIREvaluator eval(s, {a, b, BufHandle(c.buf()), N}); eval.call_raw(args); for (int i = 0; i < M; i++) { @@ -1818,7 +1818,7 @@ TEST(LLVM, CustomTarget) { Placeholder a("a", kFloat, {M}); Placeholder b("b", kFloat, {M}); Placeholder c("c", kFloat, {M}); - Tensor* d = Compute("d", {{M, "m"}}, [&](const VarHandle& m) { + Tensor d = Compute("d", {{M, "m"}}, [&](const VarHandle& m) { return a.load(m) * b.load(m) + c.load(m); }); LoopNest nest({d}); diff --git a/test/cpp/tensorexpr/test_loopnest.cpp b/test/cpp/tensorexpr/test_loopnest.cpp index c80dd5f..7c3eefa 100644 --- a/test/cpp/tensorexpr/test_loopnest.cpp +++ b/test/cpp/tensorexpr/test_loopnest.cpp @@ -42,13 +42,12 @@ void checkExprIR(const ExprHandle& e, const std::string& pattern) { TEST(LoopNest, ExprSimple01) { KernelScope kernel_scope; - Tensor* tensor = Compute( + Tensor tensor = Compute( "f", {{16, "X"}, {5, "y"}}, [](const VarHandle& x, const VarHandle& y) { return ExprHandle(1.0f) + cast(x) * x + cast(y) * y; }); LoopNest l({tensor}); - std::vector loops = - l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); LoopNest::splitWithTail(loops[0], 2); LoopNest::splitWithTail(loops[0], 2); @@ -56,7 +55,7 @@ TEST(LoopNest, ExprSimple01) { TEST(LoopNest, ExprLower01) { KernelScope kernel_scope; - Tensor* tensor = Compute( + Tensor tensor = Compute( "f", {{16, "x"}, {5, "y"}}, [](const VarHandle& x, const VarHandle& y) { return ExprHandle(1.0f) + cast(x) * x + cast(y) * y; }); @@ -73,10 +72,9 @@ TEST(LoopNest, ExprSimple02) { auto func = [](const ExprHandle& x, const ExprHandle& y) { return ExprHandle(1.0f) + cast(x) * x + cast(y) * y; }; - Tensor* tensor = Compute("f", {{26, "x"}, {5, "y"}}, func); + Tensor tensor = Compute("f", {{26, "x"}, {5, "y"}}, func); LoopNest l({tensor}); - std::vector loops = - l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); LoopNest::splitWithTail(loops[0], 4); @@ -168,14 +166,13 @@ TEST(LoopNest, ExprSliceHeadWithLoopOptions) { auto func = [](const ExprHandle& x) { return ExprHandle(1.0f) + cast(x); }; - Tensor* tensor = Compute("f", {{10, "x"}}, func); + Tensor tensor = Compute("f", {{10, "x"}}, func); LoopNest l({tensor}); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr head; // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr tail; - std::vector loops = - l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); loops[0]->set_gpu_block_index(LoopOptions::IDX_Y); LoopNest::sliceHead(loops[0], 2, &head, &tail); @@ -193,14 +190,13 @@ TEST(LoopNest, ExprSliceTailWithLoopOptions) { auto func = [](const ExprHandle& x) { return ExprHandle(1.0f) + cast(x); }; - Tensor* tensor = Compute("f", {{10, "x"}}, func); + Tensor tensor = Compute("f", {{10, "x"}}, func); LoopNest l({tensor}); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr head; // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr tail; - std::vector loops = - l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); LoopNest::sliceTail(loops[0], 4, &head, &tail); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) @@ -227,14 +223,13 @@ TEST(LoopNest, ExprSliceHeadWhenFactorEqualsSize) { auto func = [](const ExprHandle& x) { return ExprHandle(1.0f) + cast(x); }; - Tensor* tensor = Compute("f", {{10, "x"}}, func); + Tensor tensor = Compute("f", {{10, "x"}}, func); LoopNest l({tensor}); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr head; // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr tail; - std::vector loops = - l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); LoopNest::sliceHead(loops[0], 10, &head, &tail); ASSERT_EQ(head, loops[0]); @@ -249,14 +244,13 @@ TEST(LoopNest, ExprSliceHeadWhenFactorLargerThanSize) { auto func = [](const ExprHandle& x) { return ExprHandle(1.0f) + cast(x); }; - Tensor* tensor = Compute("f", {{10, "x"}}, func); + Tensor tensor = Compute("f", {{10, "x"}}, func); LoopNest l({tensor}); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr head; // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr tail; - std::vector loops = - l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); LoopNest::sliceHead(loops[0], 100, &head, &tail); ASSERT_EQ(head, loops[0]); @@ -271,14 +265,13 @@ TEST(LoopNest, ExprSliceHead) { auto func = [](const ExprHandle& x) { return ExprHandle(1.0f) + cast(x); }; - Tensor* tensor = Compute("f", {{10, "x"}}, func); + Tensor tensor = Compute("f", {{10, "x"}}, func); LoopNest l({tensor}); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr head; // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr tail; - std::vector loops = - l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); LoopNest::sliceHead(loops[0], 4, &head, &tail); ASSERT_NE(head, nullptr); @@ -295,10 +288,9 @@ TEST(LoopNest, ExprSliceHeadWithNonZeroStart) { auto func = [](const ExprHandle& x) { return ExprHandle(1.0f) + cast(x); }; - Tensor* tensor = Compute("f", {{10, "x"}}, func); + Tensor tensor = Compute("f", {{10, "x"}}, func); LoopNest l({tensor}); - std::vector loops = - l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr head; @@ -323,14 +315,13 @@ TEST(LoopNest, ExprSliceTailWhenFactorEqualsSize) { auto func = [](const ExprHandle& x) { return ExprHandle(1.0f) + cast(x); }; - Tensor* tensor = Compute("f", {{10, "x"}}, func); + Tensor tensor = Compute("f", {{10, "x"}}, func); LoopNest l({tensor}); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr head; // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr tail; - std::vector loops = - l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); LoopNest::sliceTail(loops[0], 10, &head, &tail); ASSERT_EQ(head, nullptr); @@ -347,14 +338,13 @@ TEST(LoopNest, ExprSliceTailWhenFactorLargerThanSize) { auto func = [](const ExprHandle& x) { return ExprHandle(1.0f) + cast(x); }; - Tensor* tensor = Compute("f", {{10, "x"}}, func); + Tensor tensor = Compute("f", {{10, "x"}}, func); LoopNest l({tensor}); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr head; // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr tail; - std::vector loops = - l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); LoopNest::sliceTail(loops[0], 100, &head, &tail); ASSERT_EQ(head, nullptr); @@ -369,14 +359,13 @@ TEST(LoopNest, ExprSliceTail) { auto func = [](const ExprHandle& x) { return ExprHandle(1.0f) + cast(x); }; - Tensor* tensor = Compute("f", {{10, "x"}}, func); + Tensor tensor = Compute("f", {{10, "x"}}, func); LoopNest l({tensor}); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr head; // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr tail; - std::vector loops = - l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); LoopNest::sliceTail(loops[0], 4, &head, &tail); ASSERT_NE(head, nullptr); @@ -396,15 +385,14 @@ TEST(LoopNest, ExprSplitAndSlice) { auto func = [](const ExprHandle& x) { return ExprHandle(1.0f) + cast(x); }; - Tensor* tensor = Compute("f", {{100, "x"}}, func); + Tensor tensor = Compute("f", {{100, "x"}}, func); LoopNest l({tensor}); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr inner; // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr tail; - std::vector loops = - l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); // outer: [0, 4) // inner: [0, 21) // tail: [84, 100) @@ -450,10 +438,9 @@ TEST(LoopNest, ExprSliceAndNormalize) { auto func = [](const ExprHandle& x) { return ExprHandle(1.0f) + cast(x); }; - Tensor* tensor = Compute("f", {{10, "x"}}, func); + Tensor tensor = Compute("f", {{10, "x"}}, func); LoopNest l({tensor}); - std::vector loops = - l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr head; @@ -482,11 +469,11 @@ TEST(LoopNest, ExprSliceWithVariableDimension) { const std::vector>& expected_for_ranges) { KernelScope kernel_scope; VarHandle dim("dim", kInt); - Tensor* tensor = + Tensor tensor = Compute("f", {{dim, "x"}}, [](const ExprHandle& x) { return x; }); LoopNest l({tensor}); std::vector loops = - l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr head; @@ -521,10 +508,9 @@ TEST(LoopNest, ExprSplitWithTail) { auto func = [](const ExprHandle& x) { return ExprHandle(1.0f) + cast(x); }; - Tensor* tensor = Compute("f", {{199, "x"}}, func); + Tensor tensor = Compute("f", {{199, "x"}}, func); LoopNest l({tensor}); - std::vector loops = - l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers) LoopNest::splitWithTail(loops[0], 17); // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers) @@ -552,10 +538,9 @@ TEST(LoopNest, ExprSplitWithTailNone) { auto func = [](const ExprHandle& x, const ExprHandle& y) { return ExprHandle(1.0f) + cast(x) * x + cast(y) * y; }; - Tensor* tensor = Compute("f", {{24, "x"}, {5, "y"}}, func); + Tensor tensor = Compute("f", {{24, "x"}, {5, "y"}}, func); LoopNest l({tensor}); - std::vector loops = - l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); LoopNest::splitWithTail(loops[0], 4); StmtPtr stmt = l.root_stmt(); @@ -612,14 +597,13 @@ TEST(LoopNest, ExprSplitWithMask01) { const int N = 5; Placeholder a_buf("a", kFloat, {M, N}); Placeholder b_buf("b", kFloat, {M, N}); - Tensor* tensor = Compute( + Tensor tensor = Compute( "f", {{M, "m"}, {N, "n"}}, [&](const ExprHandle& m, const ExprHandle& n) { return a_buf.load(m, n) + b_buf.load(m, n) + 1.0f; }); LoopNest l({tensor}); - std::vector loops = - l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); LoopNest::splitWithMask(loops[1], 4); StmtPtr stmt = l.root_stmt(); @@ -648,13 +632,12 @@ TEST(LoopNest, ExprSplitWithMaskRepeatedNoMask) { const int M = 64; Placeholder a_buf("a", kFloat, {M}); Placeholder b_buf("b", kFloat, {M}); - Tensor* tensor = Compute("f", {{M, "m"}}, [&](const ExprHandle& m) { + Tensor tensor = Compute("f", {{M, "m"}}, [&](const ExprHandle& m) { return a_buf.load(m) + b_buf.load(m) + 1.0f; }); LoopNest l({tensor}); - std::vector loops = - l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); LoopNest::splitWithMask(loops[0], 4); LoopNest::splitWithMask(loops[0], 4); @@ -736,14 +719,13 @@ TEST(LoopNest, TileSimple) { const int M = 64, N = 64; Placeholder a_buf("a", kFloat, {M, N}); Placeholder b_buf("b", kFloat, {M, N}); - Tensor* tensor = Compute( + Tensor tensor = Compute( "f", {{M, "m"}, {N, "n"}}, [&](const ExprHandle& m, const ExprHandle& n) { return a_buf.load({m, n}) + b_buf.load({m, n}) + 1.0f; }); LoopNest l({tensor}); - std::vector loops = - l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers) l.tile(loops[0], loops[1], 4, 8); @@ -783,14 +765,13 @@ TEST(LoopNest, TileWithTails) { const int M = 64, N = 64; Placeholder a_buf("a", kFloat, {M, N}); Placeholder b_buf("b", kFloat, {M, N}); - Tensor* tensor = Compute( + Tensor tensor = Compute( "f", {{M, "m"}, {N, "n"}}, [&](const ExprHandle& m, const ExprHandle& n) { return a_buf.load({m, n}) + b_buf.load({m, n}) + 1.0f; }); LoopNest l({tensor}); - std::vector loops = - l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers) l.tile(loops[0], loops[1], 5, 9); @@ -831,7 +812,7 @@ TEST(LoopNest, TileInMiddle) { const int M = 8, N = 8, L = 8, K = 8; Placeholder a_buf("a", kFloat, {M, N, L, K}); Placeholder b_buf("b", kFloat, {M, N, L, K}); - Tensor* tensor = Compute( + Tensor tensor = Compute( "f", {{M, "m"}, {N, "n"}, {L, "l"}, {K, "k"}}, [&](const ExprHandle& m, @@ -843,7 +824,7 @@ TEST(LoopNest, TileInMiddle) { LoopNest nest({tensor}); std::vector loops = - nest.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + nest.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers) nest.tile(loops[1], loops[2], 3, 3); @@ -891,7 +872,7 @@ TEST(LoopNest, SplitWithTailWithLoopOptions) { const int M = 21; Placeholder a_buf("a", kFloat, {M}); Placeholder b_buf("b", kFloat, {M}); - Tensor* tensor = Compute("f", {{M, "m"}}, [&](const ExprHandle& m) { + Tensor tensor = Compute("f", {{M, "m"}}, [&](const ExprHandle& m) { return a_buf.load(m) + b_buf.load(m) + 1.0f; }); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) @@ -922,7 +903,7 @@ TEST(LoopNest, SplitWithMaskWithLoopOptions) { const int M = 21; Placeholder a_buf("a", kFloat, {M}); Placeholder b_buf("b", kFloat, {M}); - Tensor* tensor = Compute("f", {{M, "m"}}, [&](const ExprHandle& m) { + Tensor tensor = Compute("f", {{M, "m"}}, [&](const ExprHandle& m) { return a_buf.load(m) + b_buf.load(m) + 1.0f; }); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) @@ -949,7 +930,7 @@ TEST(LoopNest, ScheduleBroadcastAddBuffer) { const int K = 6; Placeholder a_buf("a", kFloat, {M, N}); Placeholder b_buf("b", kFloat, {N, K}); - Tensor* c = Compute( + Tensor c = Compute( "broadcast_add", {{M, "m"}, {N, "n"}, {K, "k"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { @@ -998,20 +979,20 @@ TEST(LoopNest, ScheduleFunctionCall01) { const int K = 6; Placeholder a_buf("a", kFloat, {M, N}); Placeholder b_buf("b", kFloat, {N, K}); - Tensor* c = Compute( + Tensor c = Compute( "broadcast_add", {{M, "m"}, {N, "n"}, {K, "k"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { return a_buf.load(m, n) + b_buf.load(n, k); }); - Tensor* d = Compute( + Tensor d = Compute( "d", {{M, "m"}, {N, "n"}, {K, "k"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { - return c->load(m, n, k) + 1; + return c.load(m, n, k) + 1; }); - LoopNest l(std::vector({d}), {c, d}); + LoopNest l({d}, {c, d}); l.prepareForCodegen(); StmtPtr stmt = l.root_stmt(); std::ostringstream oss; @@ -1058,22 +1039,22 @@ TEST(LoopNest, ScheduleInlineSimple) { Placeholder c_buf("c", kFloat, {M, N}); Placeholder d_buf("d", kFloat, {M, K}); - Tensor* x = Compute( + Tensor x = Compute( "x", {{M, "m1"}, {N, "n1"}, {K, "k1"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { return a_buf.load(m, n) * b_buf.load(n, k); }); - Tensor* y = Compute( + Tensor y = Compute( "y", {{M, "m2"}, {N, "n2"}, {K, "k2"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { - return c_buf.load(m, n) * d_buf.load(m, k) + x->load(m, n, k); + return c_buf.load(m, n) * d_buf.load(m, k) + x.load(m, n, k); }); - LoopNest l1(std::vector({y}), {x, y}); + LoopNest l1({y}, {x, y}); LoopNest l2(l1); - l2.computeInline(x->buf()); + l2.computeInline(x.buf()); l1.prepareForCodegen(); l2.prepareForCodegen(); @@ -1139,31 +1120,31 @@ void InlineFunc01Helper(const std::vector& inline_order) { Placeholder c_buf("c", kFloat, {M, N}); Placeholder d_buf("d", kFloat, {M, K}); - Tensor* x = Compute( + Tensor x = Compute( "x", {{M, "m1"}, {N, "n1"}, {K, "k1"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { return a_buf.load(m, n) * b_buf.load(n, k); }); - Tensor* y = Compute( + Tensor y = Compute( "y", {{M, "m2"}, {N, "n2"}, {K, "k2"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { - return c_buf.load(m, n) * d_buf.load(m, k) + x->load(m, n, k); + return c_buf.load(m, n) * d_buf.load(m, k) + x.load(m, n, k); }); - Tensor* z = Compute( + Tensor z = Compute( "z", {{M, "m3"}, {N, "n3"}, {K, "k3"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { - return x->load(m, n, k) + y->load(m, n, k); + return x.load(m, n, k) + y.load(m, n, k); }); - LoopNest l(std::vector({z}), {x, y, z}); + LoopNest l({z}, {x, y, z}); for (const std::string& order : inline_order) { if (order == "x") { - l.computeInline(x->buf()); + l.computeInline(x.buf()); } else if (order == "y") { - l.computeInline(y->buf()); + l.computeInline(y.buf()); } else { throw std::runtime_error("Invalid order: " + order); } @@ -1218,7 +1199,7 @@ void InlineFunc01Helper(const std::vector& inline_order) { } if (inline_order.size() == 2) { - Tensor* z2 = Compute( + Tensor z2 = Compute( "z", {{M, "m3"}, {N, "n3"}, {K, "k3"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { @@ -1254,21 +1235,21 @@ TEST(LoopNest, ScheduleInlineRandom) { const int N = 5; const int K = 6; - Tensor* x = Compute( + Tensor x = Compute( "x", {{M, "m1"}, {N, "n1"}, {K, "k1"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { return Mod::make(Intrinsics::make(kRand, kInt), 5); }); - Tensor* y = Compute( + Tensor y = Compute( "y", {{M, "m2"}, {N, "n2"}, {K, "k2"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { - return x->load(m, n, k) + x->load(m, n, k); + return x.load(m, n, k) + x.load(m, n, k); }); - LoopNest l1(std::vector({y}), {x, y}); - l1.computeInline(x->buf()); + LoopNest l1({y}, {x, y}); + l1.computeInline(x.buf()); // would normally compare results but Rand isn't implemented in the // SimpleIREvaluator, even if we could seed it. @@ -1290,22 +1271,22 @@ TEST(LoopNest, ScheduleInlineRandomUnrelated) { const int N = 5; const int K = 6; - Tensor* x = Compute( + Tensor x = Compute( "x", {{M, "m1"}, {N, "n1"}, {K, "k1"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { return m * n * k; }); - Tensor* y = Compute( + Tensor y = Compute( "y", {{M, "m2"}, {N, "n2"}, {K, "k2"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { - return x->load(m, n, k) + Intrinsics::make(kRand, kInt) + + return x.load(m, n, k) + Intrinsics::make(kRand, kInt) + Intrinsics::make(kRand, kInt); }); - LoopNest l1(std::vector({y}), {x, y}); - l1.computeInline(x->buf()); + LoopNest l1({y}, {x, y}); + l1.computeInline(x.buf()); // would normally compare results but Rand isn't implemented in the // SimpleIREvaluator, even if we could seed it. @@ -1327,18 +1308,18 @@ TEST(LoopNest, ScheduleInlineRandomLowerDimensions) { const int N = 5; const int K = 6; - Tensor* x = Compute("x", {{M, "m1"}}, [&](const VarHandle& m) { + Tensor x = Compute("x", {{M, "m1"}}, [&](const VarHandle& m) { return Mod::make(Intrinsics::make(kRand, kInt), 5); }); - Tensor* y = Compute( + Tensor y = Compute( "y", {{M, "m2"}, {N, "n2"}, {K, "k2"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { - return x->load(m) + x->load(m); + return x.load(m) + x.load(m); }); - LoopNest l1(std::vector({y}), {x, y}); - l1.computeInline(x->buf()); + LoopNest l1({y}, {x, y}); + l1.computeInline(x.buf()); // would normally compare results but Rand isn't implemented in the // SimpleIREvaluator, even if we could seed it. @@ -1362,17 +1343,17 @@ TEST(LoopNest, ScheduleInlineIntrinsics) { Placeholder a_buf("a", kFloat, {M, N}); Placeholder b_buf("b", kFloat, {N, K}); - Tensor* x = Compute( + Tensor x = Compute( "x", {{M, "m1"}, {N, "n1"}, {K, "k1"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { return a_buf.load(m, n) * b_buf.load(n, k); }); - Tensor* y = Compute( + Tensor y = Compute( "y", {{M, "m2"}, {N, "n2"}, {K, "k2"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { - return Intrinsics::make(kSqrt, x->load(m, n, k)); + return Intrinsics::make(kSqrt, x.load(m, n, k)); }); PaddedBuffer a_v(M, N); @@ -1389,9 +1370,9 @@ TEST(LoopNest, ScheduleInlineIntrinsics) { } } - LoopNest l1(std::vector({y}), {x, y}); + LoopNest l1({y}, {x, y}); LoopNest l2(l1); - l2.computeInline(x->buf()); + l2.computeInline(x.buf()); l1.prepareForCodegen(); l2.prepareForCodegen(); @@ -1421,21 +1402,21 @@ TEST(LoopNest, ScheduleInlineRandWithIntrinsics) { const int N = 5; const int K = 6; - Tensor* x = Compute( + Tensor x = Compute( "x", {{M, "m1"}, {N, "n1"}, {K, "k1"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { return Intrinsics::make(kRand, kFloat); }); - Tensor* y = Compute( + Tensor y = Compute( "y", {{M, "m2"}, {N, "n2"}, {K, "k2"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { - return Intrinsics::make(kSqrt, x->load(m, n, k)); + return Intrinsics::make(kSqrt, x.load(m, n, k)); }); - LoopNest l1(std::vector({y}), {x, y}); - l1.computeInline(x->buf()); + LoopNest l1({y}, {x, y}); + l1.computeInline(x.buf()); StmtPtr stmt1 = IRSimplifier::simplify(l1.root_stmt()); @@ -1451,31 +1432,31 @@ TEST(LoopNest, ScheduleInlineRandWithIntrinsics) { // Split a Compute then inline it into another compute. TEST(LoopNest, ScheduleSplitAThenInline) { KernelScope kernel_scope; - Tensor* a = + Tensor a = Compute("a", {{18, "i"}}, [&](const VarHandle& i) { return i * i; }); - Tensor* b = Compute("b", {{2, "j"}}, [&](const VarHandle& j) { - return a->load(j + ExprHandle(8)); + Tensor b = Compute("b", {{2, "j"}}, [&](const VarHandle& j) { + return a.load(j + ExprHandle(8)); }); - LoopNest l(std::vector({b}), {a, b}); - std::vector loops = l.getAllLoopNestsWritingToBuf(a->buf()).at(0); + LoopNest l({b}, {a, b}); + std::vector loops = l.getAllLoopNestsWritingToBuf(a.buf()).at(0); LoopNest::splitWithMask(loops[0], 4); - ASSERT_THROWS_WITH(l.computeInline(a->buf()), "compound indices"); + ASSERT_THROWS_WITH(l.computeInline(a.buf()), "compound indices"); } // Split a Compute then inline another Compute into it. TEST(LoopNest, ScheduleSplitBThenInline) { KernelScope kernel_scope; - Tensor* a = + Tensor a = Compute("a", {{18, "i"}}, [&](const VarHandle& i) { return i * i; }); - Tensor* b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) { - return a->load(j + ExprHandle(8)); + Tensor b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) { + return a.load(j + ExprHandle(8)); }); - LoopNest l(std::vector({b}), {a, b}); - std::vector loops = l.getAllLoopNestsWritingToBuf(b->buf()).at(0); + LoopNest l({b}, {a, b}); + std::vector loops = l.getAllLoopNestsWritingToBuf(b.buf()).at(0); LoopNest::splitWithMask(loops[0], 3); - l.computeInline(a->buf()); + l.computeInline(a.buf()); l.prepareForCodegen(); StmtPtr s = IRSimplifier::simplify(l.root_stmt()); @@ -1491,32 +1472,32 @@ TEST(LoopNest, ScheduleSplitBThenInline) { // Split a Compute twice then inline it. TEST(LoopNest, ScheduleSplitTwiceThenInline) { KernelScope kernel_scope; - Tensor* a = + Tensor a = Compute("a", {{18, "i"}}, [&](const VarHandle& i) { return i * i; }); - Tensor* b = Compute("b", {{2, "j"}}, [&](const VarHandle& j) { - return a->load(j + ExprHandle(8)); + Tensor b = Compute("b", {{2, "j"}}, [&](const VarHandle& j) { + return a.load(j + ExprHandle(8)); }); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr i_inner; - LoopNest l(std::vector({b}), {a, b}); - std::vector loops = l.getAllLoopNestsWritingToBuf(a->buf()).at(0); + LoopNest l({b}, {a, b}); + std::vector loops = l.getAllLoopNestsWritingToBuf(a.buf()).at(0); LoopNest::splitWithMask(loops[0], 4, &i_inner); LoopNest::splitWithMask(i_inner, 2); - ASSERT_THROWS_WITH(l.computeInline(a->buf()), "compound indices"); + ASSERT_THROWS_WITH(l.computeInline(a.buf()), "compound indices"); } // Inline a Compute, then split. TEST(LoopNest, ScheduleInlineThenSplit) { KernelScope kernel_scope; - Tensor* a = + Tensor a = Compute("a", {{18, "i"}}, [&](const VarHandle& i) { return i * i; }); - Tensor* b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) { - return a->load(j + ExprHandle(8)); + Tensor b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) { + return a.load(j + ExprHandle(8)); }); - LoopNest l(std::vector({b}), {a, b}); - l.computeInline(a->buf()); + LoopNest l({b}, {a, b}); + l.computeInline(a.buf()); std::vector loops = NodeFinder::find(l.root_stmt()); LoopNest::splitWithMask(loops.back(), 3); @@ -1534,16 +1515,16 @@ TEST(LoopNest, ScheduleInlineThenSplit) { // Split a Compute, inline it, then split the result. TEST(LoopNest, ScheduleSplitInlineThenSplit) { KernelScope kernel_scope; - Tensor* a = + Tensor a = Compute("a", {{18, "i"}}, [&](const VarHandle& i) { return i * i; }); - Tensor* b = Compute("b", {{16, "j"}}, [&](const VarHandle& j) { - return a->load(j + ExprHandle(8)); + Tensor b = Compute("b", {{16, "j"}}, [&](const VarHandle& j) { + return a.load(j + ExprHandle(8)); }); - LoopNest l(std::vector({b}), {a, b}); + LoopNest l({b}, {a, b}); auto loops = NodeFinder::find(l.root_stmt()); LoopNest::splitWithMask(loops.back(), 2); - l.computeInline(a->buf()); + l.computeInline(a.buf()); loops = NodeFinder::find(l.root_stmt()); LoopNest::splitWithMask(loops.front(), 2); @@ -1561,35 +1542,35 @@ TEST(LoopNest, ScheduleSplitInlineThenSplit) { // Oversplit a loop that is simplified out after inlining. TEST(LoopNest, ScheduleSplitInlineSimplify) { KernelScope kernel_scope; - Tensor* a = Compute("a", {{18, "i"}}, [&](const VarHandle& i) { + Tensor a = Compute("a", {{18, "i"}}, [&](const VarHandle& i) { return ExprHandle(4) * i - ExprHandle(2) * i; }); - Tensor* b = Compute("b", {{2, "j"}}, [&](const VarHandle& j) { - return a->load(j) - ExprHandle(1); + Tensor b = Compute("b", {{2, "j"}}, [&](const VarHandle& j) { + return a.load(j) - ExprHandle(1); }); - LoopNest l(std::vector({b}), {a, b}); - std::vector loops = l.getAllLoopNestsWritingToBuf(a->buf()).at(0); + LoopNest l({b}, {a, b}); + std::vector loops = l.getAllLoopNestsWritingToBuf(a.buf()).at(0); LoopNest::splitWithMask(loops[0], 4); - ASSERT_THROWS_WITH(l.computeInline(a->buf()), "compound indices"); + ASSERT_THROWS_WITH(l.computeInline(a.buf()), "compound indices"); } // Inline a Compute with two consumers. TEST(LoopNest, ScheduleInlineThreeMixedOnce) { KernelScope kernel_scope; - Tensor* a = + Tensor a = Compute("a", {{18, "i"}}, [&](const VarHandle& i) { return i * i; }); - Tensor* b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) { - return a->load(j + ExprHandle(8)); + Tensor b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) { + return a.load(j + ExprHandle(8)); }); - Tensor* c = Compute( + Tensor c = Compute( "c", {{4, "k"}, {3, "l"}}, [&](const VarHandle& k, const VarHandle& l) { - return a->load(k) * b->load(l); + return a.load(k) * b.load(l); }); - LoopNest l(std::vector({c}), {a, b, c}); - std::vector loops = l.getAllLoopNestsWritingToBuf(a->buf()).at(0); - l.computeInline(a->buf()); + LoopNest l({c}, {a, b, c}); + std::vector loops = l.getAllLoopNestsWritingToBuf(a.buf()).at(0); + l.computeInline(a.buf()); l.prepareForCodegen(); StmtPtr s = IRSimplifier::simplify(l.root_stmt()); @@ -1607,20 +1588,20 @@ TEST(LoopNest, ScheduleInlineThreeMixedOnce) { // Inline Compute A into B, then inline B into C. TEST(LoopNest, ScheduleInlineThreeMixedTwice) { KernelScope kernel_scope; - Tensor* a = + Tensor a = Compute("a", {{18, "i"}}, [&](const VarHandle& i) { return i * i; }); - Tensor* b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) { - return a->load(j + ExprHandle(8)); + Tensor b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) { + return a.load(j + ExprHandle(8)); }); - Tensor* c = Compute( + Tensor c = Compute( "c", {{4, "k"}, {3, "l"}}, [&](const VarHandle& k, const VarHandle& l) { - return a->load(k) * b->load(l); + return a.load(k) * b.load(l); }); - LoopNest l(std::vector({c}), {a, b, c}); - std::vector loops = l.getAllLoopNestsWritingToBuf(a->buf()).at(0); - l.computeInline(a->buf()); - l.computeInline(b->buf()); + LoopNest l({c}, {a, b, c}); + std::vector loops = l.getAllLoopNestsWritingToBuf(a.buf()).at(0); + l.computeInline(a.buf()); + l.computeInline(b.buf()); l.prepareForCodegen(); StmtPtr s = IRSimplifier::simplify(l.root_stmt()); @@ -1638,19 +1619,19 @@ TEST(LoopNest, ScheduleInlineThreeMixedTwice) { // Inline a Compute that is both a producer and consumer. TEST(LoopNest, ScheduleInlineThreeMixedInner) { KernelScope kernel_scope; - Tensor* a = + Tensor a = Compute("a", {{18, "i"}}, [&](const VarHandle& i) { return i * i; }); - Tensor* b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) { - return a->load(j + ExprHandle(8)); + Tensor b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) { + return a.load(j + ExprHandle(8)); }); - Tensor* c = Compute( + Tensor c = Compute( "c", {{4, "k"}, {3, "l"}}, [&](const VarHandle& k, const VarHandle& l) { - return a->load(k) * b->load(l); + return a.load(k) * b.load(l); }); - LoopNest l(std::vector({c}), {a, b, c}); - std::vector loops = l.getAllLoopNestsWritingToBuf(a->buf()).at(0); - l.computeInline(b->buf()); + LoopNest l({c}, {a, b, c}); + std::vector loops = l.getAllLoopNestsWritingToBuf(a.buf()).at(0); + l.computeInline(b.buf()); l.prepareForCodegen(); StmtPtr s = IRSimplifier::simplify(l.root_stmt()); @@ -1668,25 +1649,25 @@ TEST(LoopNest, ScheduleInlineThreeMixedInner) { // Split 3 Computes, then inline the first two into the last. TEST(LoopNest, ScheduleInlineThreeMixedSplit) { KernelScope kernel_scope; - Tensor* a = + Tensor a = Compute("a", {{18, "i"}}, [&](const VarHandle& i) { return i * i; }); - Tensor* b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) { - return a->load(j + ExprHandle(8)); + Tensor b = Compute("b", {{6, "j"}}, [&](const VarHandle& j) { + return a.load(j + ExprHandle(8)); }); - Tensor* c = Compute( + Tensor c = Compute( "c", {{4, "k"}, {3, "l"}}, [&](const VarHandle& k, const VarHandle& l) { - return a->load(k) * b->load(l); + return a.load(k) * b.load(l); }); - LoopNest l(std::vector({c}), {a, b, c}); - std::vector loops = l.getAllLoopNestsWritingToBuf(a->buf()).at(0); + LoopNest l({c}, {a, b, c}); + std::vector loops = l.getAllLoopNestsWritingToBuf(a.buf()).at(0); LoopNest::splitWithMask(loops[0], 4); - loops = l.getAllLoopNestsWritingToBuf(b->buf()).at(0); + loops = l.getAllLoopNestsWritingToBuf(b.buf()).at(0); LoopNest::splitWithMask(loops[0], 3); - loops = l.getAllLoopNestsWritingToBuf(c->buf()).at(0); + loops = l.getAllLoopNestsWritingToBuf(c.buf()).at(0); LoopNest::splitWithMask(loops[0], 2); - ASSERT_THROWS_WITH(l.computeInline(a->buf()), "compound indices"); + ASSERT_THROWS_WITH(l.computeInline(a.buf()), "compound indices"); } // Check that inlining works for output tensors too @@ -1696,21 +1677,21 @@ TEST(LoopNest, ScheduleInlineOutputTensors) { const int N = 5; const int K = 6; - Tensor* x = Compute( + Tensor x = Compute( "x", {{M, "m1"}, {N, "n1"}, {K, "k1"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { return m * n * k; }); - Tensor* y = Compute( + Tensor y = Compute( "y", {{M, "m2"}, {N, "n2"}, {K, "k2"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { - return x->load(m, n, k) + m; + return x.load(m, n, k) + m; }); LoopNest l1({x, y}); - l1.computeInline(x->buf()); + l1.computeInline(x.buf()); // would normally compare results but Rand isn't implemented in the // SimpleIREvaluator, even if we could seed it. @@ -1736,14 +1717,14 @@ TEST(LoopNest, ScheduleFuserStyle) { Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat)); - Tensor* b = Compute( + Tensor b = Compute( "f", {{kTotalSize, "i"}}, [&](const std::vector& axes) { return a_buf.load(axes[0]) + 11.0f; }); - Tensor* c = Compute( + Tensor c = Compute( "g", {{kTotalSize, "i"}}, [&](const std::vector& axes) { - return b->load(axes[0]) + 1.0f; + return b.load(axes[0]) + 1.0f; }); LoopNest l({b, c}); @@ -1772,17 +1753,17 @@ TEST(LoopNest, ScheduleFuserThreeArg) { Placeholder c(BufHandle("C", {ExprHandle(kTotalSize)}, kFloat)); Placeholder d(BufHandle("D", {ExprHandle(kTotalSize)}, kFloat)); - Tensor* e = Compute("e", {{kTotalSize, "i"}}, [&](const VarHandle& i) { + Tensor e = Compute("e", {{kTotalSize, "i"}}, [&](const VarHandle& i) { return a.load(i) + b.load(i); }); - Tensor* f = Compute("f", {{kTotalSize, "i"}}, [&](const VarHandle& i) { - return e->load(i) + c.load(i); + Tensor f = Compute("f", {{kTotalSize, "i"}}, [&](const VarHandle& i) { + return e.load(i) + c.load(i); }); - Tensor* g = Compute("g", {{kTotalSize, "i"}}, [&](const VarHandle& i) { - return f->load(i) + d.load(i); + Tensor g = Compute("g", {{kTotalSize, "i"}}, [&](const VarHandle& i) { + return f.load(i) + d.load(i); }); - LoopNest l(std::vector({g}), {e, f, g}); + LoopNest l({g}, {e, f, g}); l.computeInline(l.getLoopBodyFor(e)); l.computeInline(l.getLoopBodyFor(f)); l.prepareForCodegen(); @@ -1807,7 +1788,7 @@ TEST(LoopNest, ScheduleDynamicShape2D) { VarHandle n("n", kInt); Placeholder a(BufHandle("a", {m, n}, kFloat)); Placeholder b(BufHandle("b", {m, n}, kFloat)); - Tensor* c = Compute( + Tensor c = Compute( "c", {{m, "m"}, {n, "n"}}, [&](const VarHandle& i, const VarHandle& j) { return a.load(i, j) + b.load(i, j); }); @@ -1842,12 +1823,12 @@ TEST(LoopNest, LoopNestComputeAt_1) { // and the temp should be used in B. KernelScope kernel_scope; VarHandle N("N", kInt); - Tensor* A = Compute( + Tensor A = Compute( "A", {{N, "i_a"}}, [&](const VarHandle& i_a) { return i_a * i_a; }); - Tensor* B = Compute( - "B", {{N, "i_b"}}, [&](const VarHandle& i_b) { return A->load(i_b); }); - LoopNest l(std::vector({B}), {A, B}); - std::vector loops = l.getAllLoopNestsWritingToBuf(B->buf()).at(0); + Tensor B = Compute( + "B", {{N, "i_b"}}, [&](const VarHandle& i_b) { return A.load(i_b); }); + LoopNest l({B}, {A, B}); + std::vector loops = l.getAllLoopNestsWritingToBuf(B.buf()).at(0); LoopNest::computeAt(l.getLoopBodyFor(A), loops[0]); l.prepareForCodegen(); StmtPtr s = l.root_stmt(); @@ -1891,16 +1872,16 @@ TEST(LoopNest, LoopNestComputeAt_2) { const int kW = 16, kH = 16; VarHandle W("W", kInt); VarHandle H("H", kInt); - Tensor* p = Compute( + Tensor p = Compute( "prod", {{H + 1, "py"}, {W + 1, "px"}}, [&](const VarHandle& py, const VarHandle& px) { return px * py; }); - Tensor* c = Compute( + Tensor c = Compute( "cons", {{H, "cy"}, {W, "cx"}}, [&](const VarHandle& y, const VarHandle& x) { - return p->load(y, x) + p->load(y + 1, x) + p->load(y, x + 1) + - p->load(y + 1, x + 1); + return p.load(y, x) + p.load(y + 1, x) + p.load(y, x + 1) + + p.load(y + 1, x + 1); }); std::vector c_ref(kW * kH, 0); @@ -1909,12 +1890,12 @@ TEST(LoopNest, LoopNestComputeAt_2) { c_ref[y * kW + x] = y * x + (y + 1) * x + y * (x + 1) + (y + 1) * (x + 1); } } - LoopNest orig_loopnest(std::vector({c}), {p, c}); + LoopNest orig_loopnest({c}, {p, c}); { // First let's try to compute P at axis cy (the outer loop) LoopNest l(orig_loopnest); - std::vector loops = l.getAllLoopNestsWritingToBuf(c->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(c.buf()).at(0); LoopNest::computeAt(l.getLoopBodyFor(p), loops[0]); l.prepareForCodegen(); StmtPtr s = l.root_stmt(); @@ -1940,7 +1921,7 @@ TEST(LoopNest, LoopNestComputeAt_2) { { // Now let's try to compute P at axis cx (the inner loop) LoopNest l(orig_loopnest); - std::vector loops = l.getAllLoopNestsWritingToBuf(c->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(c.buf()).at(0); LoopNest::computeAt(l.getLoopBodyFor(p), loops[1]); l.prepareForCodegen(); StmtPtr s = l.root_stmt(); @@ -1979,27 +1960,25 @@ TEST(LoopNest, LoopNestComputeAt_3) { const int kW = 16, kH = 16; VarHandle W("W", kInt); VarHandle H("H", kInt); - Tensor* A = Compute( + Tensor A = Compute( "A", {{H + 1, "ay"}, {W + 1, "ax"}}, [&](const VarHandle& ay, const VarHandle& ax) { return ax * ay; }); - Tensor* B = Compute( + Tensor B = Compute( "B", {{H + 1, "by"}, {W + 1, "bx"}}, - [&](const VarHandle& by, const VarHandle& bx) { - return A->load(by, bx); - }); - Tensor* C = Compute( + [&](const VarHandle& by, const VarHandle& bx) { return A.load(by, bx); }); + Tensor C = Compute( "C", {{H, "cy"}, {W, "cx"}}, [&](const VarHandle& cy, const VarHandle& cx) { - return B->load(cy, cx + 1); + return B.load(cy, cx + 1); }); - Tensor* D = Compute( + Tensor D = Compute( "D", {{H, "dy"}, {W, "dx"}}, [&](const VarHandle& dy, const VarHandle& dx) { - return A->load(dy + 1, dx) + C->load(dy, dx); + return A.load(dy + 1, dx) + C.load(dy, dx); }); std::vector c_ref(kW * kH, 0); @@ -2009,11 +1988,11 @@ TEST(LoopNest, LoopNestComputeAt_3) { } } - LoopNest orig_loopnest(std::vector({D}), {A, B, C, D}); + LoopNest orig_loopnest({D}, {A, B, C, D}); { // First let's try to compute A at axis dy (the outer loop) LoopNest l(orig_loopnest); - std::vector loops = l.getAllLoopNestsWritingToBuf(D->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(D.buf()).at(0); LoopNest::computeAt(l.getLoopBodyFor(A), loops[0]); l.prepareForCodegen(); StmtPtr s = l.root_stmt(); @@ -2044,7 +2023,7 @@ TEST(LoopNest, LoopNestComputeAt_3) { { // Now let's try to compute A at axis dx (the inner loop) LoopNest l(orig_loopnest); - std::vector loops = l.getAllLoopNestsWritingToBuf(D->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(D.buf()).at(0); LoopNest::computeAt(l.getLoopBodyFor(A), loops[1]); l.prepareForCodegen(); StmtPtr s = l.root_stmt(); @@ -2083,15 +2062,15 @@ TEST(LoopNest, Reduce2dComputeAt) { VarHandle W("W", kInt); VarHandle H("H", kInt); - Tensor* p = + Tensor p = Compute("prod", {{H + 1, "py"}, {W + 1, "px"}}, [&](Axis py, Axis px) { return px * py; }); - Tensor* c = Reduce( + Tensor c = Reduce( "cons", {{H, "cy"}, {W, "cx"}}, Sum(), - [&](Axis y, Axis x, Axis r, Axis s) { return p->load(y + r, x + s); }, + [&](Axis y, Axis x, Axis r, Axis s) { return p.load(y + r, x + s); }, {{2, "r"}, {2, "s"}}); std::vector c_ref(kW * kH, 0); @@ -2100,7 +2079,7 @@ TEST(LoopNest, Reduce2dComputeAt) { c_ref[y * kW + x] = y * x + (y + 1) * x + y * (x + 1) + (y + 1) * (x + 1); } } - LoopNest orig_loopnest(std::vector({c}), {p, c}); + LoopNest orig_loopnest({c}, {p, c}); checkIR(orig_loopnest.root_stmt(), R"IR( # CHECK: for (int py = 0; py < H + 1; py++) { # CHECK: for (int px = 0; px < W + 1; px++) { @@ -2122,7 +2101,7 @@ TEST(LoopNest, Reduce2dComputeAt) { { // First let's try to compute P at axis cy (the outer loop) LoopNest l(orig_loopnest); - auto loops = l.getAllLoopNestsWritingToBuf(c->buf()).at(0); + auto loops = l.getAllLoopNestsWritingToBuf(c.buf()).at(0); LoopNest::computeAt(l.getLoopBodyFor(p), loops[0]); // FIXME: Calling simplify here breaks the IR: // MALFORMED INPUT: could not find base node in Load - temp[...] @@ -2159,7 +2138,7 @@ TEST(LoopNest, Reduce2dComputeAt) { { // Now let's try to compute P at axis cx (the inner loop) LoopNest l(orig_loopnest); - std::vector loops = l.getAllLoopNestsWritingToBuf(c->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(c.buf()).at(0); LoopNest::computeAt(l.getLoopBodyFor(p), loops[1]); l.simplify(); l.eliminateDeadStores(); @@ -2205,17 +2184,17 @@ TEST(LoopNest, DISABLED_Conv1d_NH) { int Pad = 1; Placeholder IP("input", kFloat, {H}); - Tensor* A = + Tensor A = Compute("A", {{N, "np"}, {H + 2 * Pad, "hp"}}, [&](Axis n, Axis h) { auto cond = CompareSelect::make(h, Pad, 1, 0, kLT); cond = CompareSelect::make(h, H + Pad, 1, cond, kGE); return ifThenElse(cond, 0.f, IP.load(n, h - Pad)); }); - Tensor* B = Reduce( + Tensor B = Reduce( "B", {{N, "n"}, {H, "h"}}, Sum(), - [&](Axis n, Axis h, Axis r) { return A->load(n, h + r); }, + [&](Axis n, Axis h, Axis r) { return A.load(n, h + r); }, {{R, "r"}}); LoopNest l({B}); checkIR(l.root_stmt(), R"IR( @@ -2233,7 +2212,7 @@ TEST(LoopNest, DISABLED_Conv1d_NH) { # CHECK: } # CHECK: } )IR"); - std::vector loops = l.getAllLoopNestsWritingToBuf(B->buf()).at(0); + std::vector loops = l.getAllLoopNestsWritingToBuf(B.buf()).at(0); LoopNest::computeAt(l.getLoopBodyFor(A), loops[0]); // FIXME: The current IR is totally broken. The body of the inlined loop is: @@ -2292,7 +2271,7 @@ class LoopOrderHelper : public IRVisitor { TEST(LoopNest, LoopNestReorderAxis1) { KernelScope kernel_scope; - Tensor* tensor = Compute( + Tensor tensor = Compute( "f", {{2, "x"}, {3, "y"}}, [](const VarHandle& x, const VarHandle& y) { return ExprHandle(1.0f) + cast(x) * x + cast(y) * y; }); @@ -2303,7 +2282,7 @@ TEST(LoopNest, LoopNestReorderAxis1) { SimpleIREvaluator cg(stmt1, {tensor}); cg.call({stmt1_output}); - auto loops = l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + auto loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); LoopNest::reorderAxis(loops[0], loops[1]); StmtPtr stmt2 = Stmt::clone(l.root_stmt()); @@ -2324,7 +2303,7 @@ TEST(LoopNest, LoopNestReorderAxis1) { } // Reorder them back. - loops = l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); LoopNest::reorderAxis(loops[0], loops[1]); StmtPtr stmt3 = l.root_stmt(); @@ -2341,7 +2320,7 @@ TEST(LoopNest, LoopNestReorderAxis1) { TEST(LoopNest, LoopNestReorderPartialAxes) { KernelScope kernel_scope; - Tensor* tensor = Compute( + Tensor tensor = Compute( "f", {{2, "x"}, {3, "y"}, {4, "z"}}, [](const VarHandle& x, const VarHandle& y, const VarHandle& z) { @@ -2358,7 +2337,7 @@ TEST(LoopNest, LoopNestReorderPartialAxes) { SimpleIREvaluator cg(stmt1, {tensor}); cg.call({stmt1_output}); - auto loops = l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + auto loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); LoopNest::reorderAxis(loops[0], loops[1]); ASSERT_EQ(loopOrderHelper.getOrder(l.root_stmt()), "y,x,z,"); @@ -2372,7 +2351,7 @@ TEST(LoopNest, LoopNestReorderPartialAxes) { ASSERT_EQ(stmt1_output[i], stmt2_output[i]); } - loops = l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); LoopNest::reorderAxis(loops[1], loops[2]); ASSERT_EQ(loopOrderHelper.getOrder(l.root_stmt()), "y,z,x,"); @@ -2389,7 +2368,7 @@ TEST(LoopNest, LoopNestReorderPartialAxes) { TEST(LoopNest, LoopNestReorderInternalAxis) { KernelScope kernel_scope; - Tensor* tensor = Compute( + Tensor tensor = Compute( "f", {{1, "w"}, {2, "x"}, {3, "y"}, {4, "z"}}, [](const VarHandle& w, @@ -2409,7 +2388,7 @@ TEST(LoopNest, LoopNestReorderInternalAxis) { SimpleIREvaluator cg(stmt1, {tensor}); cg.call({stmt1_output}); - auto loops = l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + auto loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); LoopNest::reorderAxis(loops[2], loops[1]); ASSERT_EQ(loopOrderHelper.getOrder(l.root_stmt()), "w,y,x,z,"); @@ -2426,7 +2405,7 @@ TEST(LoopNest, LoopNestReorderInternalAxis) { TEST(LoopNest, LoopNestReorderEnclosingAxis) { KernelScope kernel_scope; - Tensor* tensor = Compute( + Tensor tensor = Compute( "f", {{1, "w"}, {2, "x"}, {3, "y"}, {4, "z"}}, [](const VarHandle& w, @@ -2445,7 +2424,7 @@ TEST(LoopNest, LoopNestReorderEnclosingAxis) { SimpleIREvaluator cg(stmt1, {tensor}); cg.call({stmt1_output}); - auto loops = l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + auto loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); LoopNest::reorderAxis(loops[0], loops[3]); ASSERT_EQ(loopOrderHelper.getOrder(l.root_stmt()), "z,x,y,w,"); @@ -2462,14 +2441,14 @@ TEST(LoopNest, LoopNestReorderEnclosingAxis) { TEST(LoopNest, LoopNestReorderSameAxis) { KernelScope kernel_scope; - Tensor* tensor = Compute( + Tensor tensor = Compute( "f", {{2, "x"}, {3, "y"}}, [](const VarHandle& x, const VarHandle& y) { return ExprHandle(1.0f) + cast(x) * x + cast(y) * y; }); LoopNest l({tensor}); StmtPtr stmt1 = Stmt::clone(l.root_stmt()); - auto loops = l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + auto loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); LoopNest::reorderAxis(loops[1], loops[1]); StmtPtr stmt2 = Stmt::clone(l.root_stmt()); @@ -2492,7 +2471,7 @@ TEST(LoopNest, LoopNestReorderExtraStatements) { KernelScope kernel_scope; - Tensor* tensor = Compute( + Tensor tensor = Compute( "f", {{2, "x"}, {3, "y"}, {4, "z"}}, [](const VarHandle& x, const VarHandle& y, const VarHandle& z) { @@ -2503,7 +2482,7 @@ TEST(LoopNest, LoopNestReorderExtraStatements) { Placeholder extra(BufHandle("res", {6, 3}, kFloat)); - auto loops = l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + auto loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); VarHandle i = VarHandle(loops[0]->var()); @@ -2589,7 +2568,7 @@ TEST(LoopNest, LoopNestReorderExtraStatements) { * * */ - loops = l.getAllLoopNestsWritingToBuf(tensor->buf()).at(0); + loops = l.getAllLoopNestsWritingToBuf(tensor.buf()).at(0); LoopNest::reorderAxis(loops[0], loops[2]); StmtPtr stmt3 = Stmt::clone(l.root_stmt()); @@ -2628,7 +2607,7 @@ void LoopNestReorderTestHelper( int index2) { KernelScope kernel_scope; - Tensor* c = Compute( + Tensor c = Compute( "5d", {{2, "a"}, {3, "b"}, {2, "c"}, {3, "d"}, {2, "e"}}, [](const std::vector&) { return -1; }); @@ -2636,7 +2615,7 @@ void LoopNestReorderTestHelper( Placeholder extra(BufHandle("extra", {5}, kInt)); - auto loops = l.getAllLoopNestsWritingToBuf(c->buf()).at(0); + auto loops = l.getAllLoopNestsWritingToBuf(c.buf()).at(0); int j = 0; for (auto l : loops) { // Add an increment at each layer of the loop which counts the number of @@ -2677,7 +2656,7 @@ void LoopNestReorderTestHelper( ASSERT_EQ(extra1[i], expected_loops); } - loops = l.getAllLoopNestsWritingToBuf(c->buf()).at(0); + loops = l.getAllLoopNestsWritingToBuf(c.buf()).at(0); LoopNest::reorderAxis(loops[index1], loops[index2]); StmtPtr stmt2 = Stmt::clone(l.root_stmt()); @@ -2752,26 +2731,26 @@ TEST(LoopNest, LoopNestReorderInternalLoopNest) { Placeholder c_buf("c", kFloat, {M, N}); Placeholder d_buf("d", kFloat, {M, K}); - Tensor* x = Compute( + Tensor x = Compute( "x", {{M, "m1"}, {N, "n1"}, {K, "k1"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { return a_buf.load(m, n) * b_buf.load(n, k); }); - Tensor* y = Compute( + Tensor y = Compute( "y", {{M, "m2"}, {N, "n2"}, {K, "k2"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { - return c_buf.load(m, n) * d_buf.load(m, k) + x->load(m, n, k); + return c_buf.load(m, n) * d_buf.load(m, k) + x.load(m, n, k); }); - Tensor* z = Compute( + Tensor z = Compute( "z", {{M, "m3"}, {N, "n3"}, {K, "k3"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { - return x->load(m, n, k) + y->load(m, n, k); + return x.load(m, n, k) + y.load(m, n, k); }); - LoopNest l(std::vector({z}), {x, y, z}); + LoopNest l({z}, {x, y, z}); ForPtr a = nullptr; ForPtr b = nullptr; auto fors = NodeFinder::find(l.root_stmt()); @@ -2845,14 +2824,14 @@ TEST(LoopNest, LoopNestReorderInternalLoopNest) { TEST(LoopNest, OuterLoopVectorization) { KernelScope kernel_scope; - Tensor* tensor = Compute( + Tensor tensor = Compute( "f", {{8, "X"}, {8, "y"}}, [](const VarHandle& x, const VarHandle& y) { return ExprHandle(1.0f) + cast(x) * x + cast(y) * y; }); LoopNest l({tensor}); ASSERT_TRUE( - LoopNest::vectorize(l.getAllLoopNestsWritingToBuf(tensor->buf())[0][0])); + LoopNest::vectorize(l.getAllLoopNestsWritingToBuf(tensor.buf())[0][0])); StmtPtr root_stmt = l.root_stmt(); BlockPtr outer_block = to(root_stmt); @@ -2899,10 +2878,10 @@ namespace { std::string constantUpperBoundLoopIR(int upper_bound_val) { KernelScope kernel_scope; ExprHandle upper_bound(upper_bound_val); - Tensor* A = Compute( + Tensor A = Compute( "A", {{upper_bound, "x"}}, [&](const VarHandle& x) { return x * 2; }); LoopNest l({A}); - std::vector loops = l.getAllLoopNestsWritingToBuf(A->buf())[0]; + std::vector loops = l.getAllLoopNestsWritingToBuf(A.buf())[0]; StmtPtr unrolled = nullptr; LoopNest::unroll(loops[0], &unrolled); std::ostringstream oss; @@ -2927,12 +2906,12 @@ TEST(LoopNest, UnrollOuter) { KernelScope kernel_scope; ExprHandle outer_bound(3); ExprHandle inner_bound(4); - Tensor* A = Compute( + Tensor A = Compute( "A", {{outer_bound, "x"}, {inner_bound, "y"}}, [&](const VarHandle& x, const VarHandle& y) { return x + y; }); LoopNest l({A}); - std::vector loops = l.getAllLoopNestsWritingToBuf(A->buf())[0]; + std::vector loops = l.getAllLoopNestsWritingToBuf(A.buf())[0]; StmtPtr unrolled = nullptr; LoopNest::unroll(loops[0], &unrolled); checkIR(unrolled, R"IR( @@ -2951,12 +2930,12 @@ TEST(LoopNest, UnrollInner) { KernelScope kernel_scope; ExprHandle outer_bound(3); ExprHandle inner_bound(4); - Tensor* A = Compute( + Tensor A = Compute( "A", {{outer_bound, "x"}, {inner_bound, "y"}}, [&](const VarHandle& x, const VarHandle& y) { return x + y; }); LoopNest l({A}); - std::vector loops = l.getAllLoopNestsWritingToBuf(A->buf())[0]; + std::vector loops = l.getAllLoopNestsWritingToBuf(A.buf())[0]; StmtPtr unrolled = nullptr; LoopNest::unroll( static_to(loops[0]->body()->stmts().front()), &unrolled); @@ -3044,10 +3023,10 @@ TEST(LoopNest, UnrollEmpty) { TEST(LoopNest, NoUnroll) { KernelScope kernel_scope; VarHandle upper_bound("N", kInt); - Tensor* A = Compute( + Tensor A = Compute( "A", {{upper_bound, "x"}}, [&](const VarHandle& x) { return x * 2; }); LoopNest l({A}); - std::vector loops = l.getAllLoopNestsWritingToBuf(A->buf())[0]; + std::vector loops = l.getAllLoopNestsWritingToBuf(A.buf())[0]; StmtPtr unrolled = nullptr; ASSERT_THROWS_WITH( LoopNest::unroll(loops[0], &unrolled), "non-constant loop"); @@ -3326,7 +3305,7 @@ TEST(LoopNest, NormalizeAndSplitWithTail) { // Create a dummy tensor to construct LoopNest. ExprHandle n(100); Placeholder a(BufHandle("a", {n}, kFloat)); - Tensor* b = + Tensor b = Compute("b", {{n, "i"}}, [&](const VarHandle& i) { return a.load(i); }); LoopNest l({b}); @@ -3615,12 +3594,12 @@ TEST(LoopNest, FlattenReductionLoopNestFromTensor) { VarHandle m("m", kInt); VarHandle n("n", kInt); Placeholder b(BufHandle("b", {m, n}, kFloat)); - Tensor* c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}}); + Tensor c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}}); LoopNest loop({c}); HashProvider hasher; auto hash_before = hasher.hash(loop.root_stmt()); - auto loops = loop.getAllLoopNestsWritingToBuf(c->buf())[1]; + auto loops = loop.getAllLoopNestsWritingToBuf(c.buf())[1]; ForPtr flattened = nullptr; ASSERT_FALSE(LoopNest::flatten(loops, &flattened)); ASSERT_EQ(flattened, nullptr); @@ -3673,14 +3652,14 @@ TEST(LoopNest, DetectInlineRankMismatch) { const int kTotalSize = 8; Placeholder a_buf(BufHandle("A", {ExprHandle(kTotalSize)}, kFloat)); - Tensor* a = Compute("a", {{kTotalSize, "i"}}, [&](const VarHandle& i) { + Tensor a = Compute("a", {{kTotalSize, "i"}}, [&](const VarHandle& i) { return a_buf.load(i); }); - Tensor* reshape = Compute( + Tensor reshape = Compute( "reshape", {{kTotalSize / 2, "i"}, {2, "j"}}, - [&](const VarHandle& i, const VarHandle& j) { return a->load(i, j); }); - LoopNest l(std::vector({reshape}), {a, reshape}); + [&](const VarHandle& i, const VarHandle& j) { return a.load(i, j); }); + LoopNest l({reshape}, {a, reshape}); ASSERT_THROWS_WITH( l.computeInline(l.getLoopBodyFor(a)), "Placeholder indexed access is inconsistent with its rank"); @@ -3689,22 +3668,22 @@ TEST(LoopNest, DetectInlineRankMismatch) { TEST(LoopNest, CacheReadsSimple) { KernelScope kernel_scope; - Tensor* A = Compute( + Tensor A = Compute( "A", {{64, "i"}, {64, "j"}}, [](const VarHandle& i, const VarHandle& j) { return i * j; }); - Tensor* B = Compute( + Tensor B = Compute( "B", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) { - return A->load(i + 30, j + 3); + return A.load(i + 30, j + 3); }); - Tensor* C = Compute( + Tensor C = Compute( "C", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) { - return A->load(i + 10, j + 20) + A->load(i + 30, j + 40); + return A.load(i + 10, j + 20) + A.load(i + 30, j + 40); }); - LoopNest l(std::vector({B, C}), {A, B, C}); - StmtPtr j_loop = l.getAllLoopNestsWritingToBuf(B->buf())[0][1]; - LoopNest::cacheAccesses(A->buf(), "A_local", j_loop); + LoopNest l({B, C}, {A, B, C}); + StmtPtr j_loop = l.getAllLoopNestsWritingToBuf(B.buf())[0][1]; + LoopNest::cacheAccesses(A.buf(), "A_local", j_loop); l.prepareForCodegen(); StmtPtr result = IRSimplifier::simplify(l.root_stmt()); @@ -3757,22 +3736,22 @@ TEST(LoopNest, CacheReadsSimple) { TEST(LoopNest, CacheReadsOuter) { KernelScope kernel_scope; - Tensor* A = Compute( + Tensor A = Compute( "A", {{64, "i"}, {64, "j"}}, [](const VarHandle& i, const VarHandle& j) { return i * j; }); - Tensor* B = Compute( + Tensor B = Compute( "B", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) { - return A->load(i + 30, j + 40) + A->load(i + 31, j + 41); + return A.load(i + 30, j + 40) + A.load(i + 31, j + 41); }); - Tensor* C = Compute( + Tensor C = Compute( "C", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) { - return A->load(i + 10, j + 20) + A->load(i + 30, j + 40); + return A.load(i + 10, j + 20) + A.load(i + 30, j + 40); }); - LoopNest l(std::vector({B, C}), {A, B, C}); - StmtPtr i_loop = l.getAllLoopNestsWritingToBuf(B->buf())[0][0]; - LoopNest::cacheAccesses(A->buf(), "A_local", i_loop); + LoopNest l({B, C}, {A, B, C}); + StmtPtr i_loop = l.getAllLoopNestsWritingToBuf(B.buf())[0][0]; + LoopNest::cacheAccesses(A.buf(), "A_local", i_loop); l.prepareForCodegen(); StmtPtr result = IRSimplifier::simplify(l.root_stmt()); @@ -3805,22 +3784,22 @@ TEST(LoopNest, CacheReadsOuter) { TEST(LoopNest, CacheReadsInternal) { KernelScope kernel_scope; - Tensor* A = Compute( + Tensor A = Compute( "A", {{64, "i"}, {64, "j"}}, [](const VarHandle& i, const VarHandle& j) { return i * j; }); - Tensor* B = Compute( + Tensor B = Compute( "B", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) { - return A->load(i + 30, j + 40) + A->load(i + 31, j + 41); + return A.load(i + 30, j + 40) + A.load(i + 31, j + 41); }); - Tensor* C = Compute( + Tensor C = Compute( "C", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) { - return A->load(i + 10, j + 20) + A->load(i + 30, j + 40); + return A.load(i + 10, j + 20) + A.load(i + 30, j + 40); }); - LoopNest l(std::vector({B, C}), {A, B, C}); - StmtPtr j_loop = l.getAllLoopNestsWritingToBuf(B->buf())[0][1]; - LoopNest::cacheAccesses(A->buf(), "A_local", j_loop); + LoopNest l({B, C}, {A, B, C}); + StmtPtr j_loop = l.getAllLoopNestsWritingToBuf(B.buf())[0][1]; + LoopNest::cacheAccesses(A.buf(), "A_local", j_loop); l.prepareForCodegen(); StmtPtr result = IRSimplifier::simplify(l.root_stmt()); @@ -3852,23 +3831,23 @@ TEST(LoopNest, CacheReadsInternal) { TEST(LoopNest, CacheReadsInner) { KernelScope kernel_scope; - Tensor* A = Compute( + Tensor A = Compute( "A", {{64, "i"}, {64, "j"}}, [](const VarHandle& i, const VarHandle& j) { return i * j; }); // note im changing the offset of the first arg of the first call to A. - Tensor* B = Compute( + Tensor B = Compute( "B", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) { - return A->load(i + 34, j + 40) + A->load(i + 30, j + 41); + return A.load(i + 34, j + 40) + A.load(i + 30, j + 41); }); - Tensor* C = Compute( + Tensor C = Compute( "C", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) { - return A->load(i + 10, j + 20) + A->load(i + 30, j + 40); + return A.load(i + 10, j + 20) + A.load(i + 30, j + 40); }); - LoopNest l(std::vector({B, C}), {A, B, C}); + LoopNest l({B, C}, {A, B, C}); StmtPtr body = l.getLoopBodyFor(B); - LoopNest::cacheAccesses(A->buf(), "A_local", body); + LoopNest::cacheAccesses(A.buf(), "A_local", body); l.prepareForCodegen(); StmtPtr result = IRSimplifier::simplify(l.root_stmt()); @@ -3900,22 +3879,22 @@ TEST(LoopNest, CacheReadsInner) { TEST(LoopNest, CacheWritesSimple) { KernelScope kernel_scope; - Tensor* A = Compute( + Tensor A = Compute( "A", {{64, "i"}, {64, "j"}}, [](const VarHandle& i, const VarHandle& j) { return i * j; }); - Tensor* B = Compute( + Tensor B = Compute( "B", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) { - return A->load(i + 30, j + 40) + A->load(i + 31, j + 41); + return A.load(i + 30, j + 40) + A.load(i + 31, j + 41); }); - Tensor* C = Compute( + Tensor C = Compute( "C", {{20, "i"}, {10, "j"}}, [&](const VarHandle& i, const VarHandle& j) { - return A->load(i + 10, j + 20) + A->load(i + 30, j + 40); + return A.load(i + 10, j + 20) + A.load(i + 30, j + 40); }); - LoopNest l(std::vector({B, C}), {A, B, C}); - StmtPtr a_loop = l.getAllLoopNestsWritingToBuf(A->buf())[0][1]; - LoopNest::cacheAccesses(A->buf(), "A_local", a_loop); + LoopNest l({B, C}, {A, B, C}); + StmtPtr a_loop = l.getAllLoopNestsWritingToBuf(A.buf())[0][1]; + LoopNest::cacheAccesses(A.buf(), "A_local", a_loop); l.prepareForCodegen(); StmtPtr result = IRSimplifier::simplify(l.root_stmt()); @@ -4054,7 +4033,7 @@ TEST(LoopNest, CompoundTensorSimple) { auto outer_for2 = For::make(x, 0, 10, inner_for2); BlockPtr body = Block::make({outer_for1, outer_for2}); - Tensor* A = new Tensor(a_buf.node(), body); + Tensor A = Tensor(a_buf.node(), body); LoopNest l({A}); l.prepareForCodegen(); @@ -4080,22 +4059,22 @@ TEST(LoopNest, InlineConstantIndex) { KernelScope kernel_scope; const int N = 10; Placeholder x_buf("a", kFloat, {1, N, 1}); - Tensor* y = Compute( + Tensor y = Compute( "f", {{1, "m"}, {N, "n"}, {1, "o"}}, [&](const ExprHandle& m, const ExprHandle& n, const ExprHandle& o) { return x_buf.load(m, n, o); }); - Tensor* z = Compute( + Tensor z = Compute( "f", {{1, "m"}, {N, "n"}, {1, "o"}}, [&](const ExprHandle& m, const ExprHandle& n, const ExprHandle& o) { - return y->load(m, n, o); + return y.load(m, n, o); }); - LoopNest l(std::vector({z}), {y, z}); + LoopNest l({z}, {y, z}); l.simplify(); - ASSERT_TRUE(l.computeInline(y->buf())); + ASSERT_TRUE(l.computeInline(y.buf())); } TEST(LoopNest, CompoundTensorUsed) { @@ -4115,14 +4094,14 @@ TEST(LoopNest, CompoundTensorUsed) { auto outer_for2 = For::make(x, 0, 10, inner_for2); BlockPtr body = Block::make({outer_for1, outer_for2}); - Tensor* A = new Tensor(a_buf.node(), body); - Tensor* B = Compute( + Tensor A = Tensor(a_buf.node(), body); + Tensor B = Compute( "B", {{10, "i"}, {3, "j"}}, [&](const VarHandle& i, const VarHandle& j) { - return A->load(i, j + 1) + A->load(i, j + 2); + return A.load(i, j + 1) + A.load(i, j + 2); }); - LoopNest l(std::vector({B}), {A, B}); - ASSERT_FALSE(l.computeInline(A->buf())); + LoopNest l({B}, {A, B}); + ASSERT_FALSE(l.computeInline(A.buf())); l.prepareForCodegen(); std::vector a_data(50, 0); @@ -4707,12 +4686,10 @@ TEST(LoopNest, OptimizeConditionalsNotNormalized) { ASSERT_EQ(hash_before, hash_after); } -static std::pair, Tensor*> colReduce( - int M, - int N) { +static std::pair, Tensor> colReduce(int M, int N) { auto a = std::make_unique("a", kFloat, std::vector{M, N}); - Tensor* t = Reduce( + Tensor t = Reduce( "b", {{N, "n"}}, Sum(), @@ -4721,10 +4698,10 @@ static std::pair, Tensor*> colReduce( return {std::move(a), t}; } -static StmtPtr splitTailReorder(Tensor* b) { +static StmtPtr splitTailReorder(Tensor b) { constexpr int kVectorWidth = 8; LoopNest nest({b}); - auto loops = nest.getAllLoopNestsWritingToBuf(b->buf())[0]; + auto loops = nest.getAllLoopNestsWritingToBuf(b.buf())[0]; nest.splitWithTail(loops[0], kVectorWidth); // Now the loopnests will look like: // @@ -4745,24 +4722,24 @@ static StmtPtr splitTailReorder(Tensor* b) { // Write #2: "b[n_outer * 8 + n_inner] = ReduceOp(...)" // Loopnest #2: {n_outer, n_inner, m}; // We will have to reorder n_inner and m. - auto loopnests = nest.getAllLoopNestsWritingToBuf(b->buf()); + auto loopnests = nest.getAllLoopNestsWritingToBuf(b.buf()); LoopNest::reorderAxis(loopnests[1][1], loopnests[1][2]); nest.prepareForCodegen(); return nest.root_stmt(); } -static StmtPtr splitMaskReorder(Tensor* b) { +static StmtPtr splitMaskReorder(Tensor b) { constexpr int kVectorWidth = 8; LoopNest nest({b}); - auto loops = nest.getAllLoopNestsWritingToBuf(b->buf())[1]; + auto loops = nest.getAllLoopNestsWritingToBuf(b.buf())[1]; nest.splitWithMask(loops[0], kVectorWidth); - loops = nest.getAllLoopNestsWritingToBuf(b->buf())[1]; + loops = nest.getAllLoopNestsWritingToBuf(b.buf())[1]; LoopNest::reorderAxis(loops[1], loops[2]); nest.prepareForCodegen(); return nest.root_stmt(); } -static void checkColReduce(StmtPtr s, Placeholder& p, Tensor* t) { +static void checkColReduce(StmtPtr s, Placeholder& p, Tensor t) { int M = immediateAs(p.dim(0)); int N = immediateAs(p.dim(1)); PaddedBuffer a(M, N); @@ -4893,14 +4870,14 @@ TEST(LoopNest, VectorizeUse) { KernelScope kernel_scope; constexpr int N = 8; Placeholder a("a", kFloat, {N}); - Tensor* b = Compute( + Tensor b = Compute( "b", {{N, "n"}}, [&](const VarHandle& n) { return a.load(n) + 1.0f; }); - Tensor* c = Compute( - "c", {{N, "n"}}, [&](const VarHandle& n) { return b->load(n) + 2.0f; }); - LoopNest nest(std::vector({c}), {b, c}); - auto loops = nest.getAllLoopNestsWritingToBuf(b->buf())[0]; + Tensor c = Compute( + "c", {{N, "n"}}, [&](const VarHandle& n) { return b.load(n) + 2.0f; }); + LoopNest nest({c}, {b, c}); + auto loops = nest.getAllLoopNestsWritingToBuf(b.buf())[0]; ASSERT_TRUE(LoopNest::vectorize(loops[0])); - loops = nest.getAllLoopNestsWritingToBuf(c->buf())[0]; + loops = nest.getAllLoopNestsWritingToBuf(c.buf())[0]; ASSERT_TRUE(LoopNest::vectorize(loops[0])); nest.prepareForCodegen(); // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores) @@ -4939,7 +4916,7 @@ TEST(LoopNest, Int64Compute) { constexpr int64_t N = 12; Placeholder a("a", kLong, {N}); - Tensor* b = Compute("b", {{N, "n"}}, [&](const VarHandle& n) { + Tensor b = Compute("b", {{N, "n"}}, [&](const VarHandle& n) { return a.load(n) + LongImm::make(1l); }); LoopNest nest({b}); diff --git a/test/cpp/tensorexpr/test_memdependency.cpp b/test/cpp/tensorexpr/test_memdependency.cpp index 9503f9d..db37b66 100644 --- a/test/cpp/tensorexpr/test_memdependency.cpp +++ b/test/cpp/tensorexpr/test_memdependency.cpp @@ -2726,28 +2726,28 @@ TEST(MemDependency, MemDependencyCheckerComputeAPI) { // Can determine if 2 loops created by Compute are dependent. Placeholder a_buf("a", kFloat, {4, 5}); Placeholder b_buf("b", kFloat, {5, 6}); - Tensor* c = Compute( + Tensor c = Compute( "broadcast_add", {{4, "m"}, {5, "n"}, {6, "k"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { return a_buf.load(m, n) + b_buf.load(n, k); }); - Tensor* d = Compute( + Tensor d = Compute( "d", {{4, "m"}, {5, "n"}, {6, "k"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { - return c->load(m, n, k) + 1; + return c.load(m, n, k) + 1; }); - LoopNest l(std::vector({d}), {c, d}); + LoopNest l({d}, {c, d}); - MemDependencyChecker analyzer({a_buf.data(), b_buf.data()}, {d->buf()}); + MemDependencyChecker analyzer({a_buf.data(), b_buf.data()}, {d.buf()}); l.root_stmt()->accept(&analyzer); // Sanity test: Output depends on input. - ASSERT_TRUE(analyzer.dependsIndirectly(d->buf(), a_buf.data())); - ASSERT_TRUE(analyzer.dependsIndirectly(d->buf(), b_buf.data())); + ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), a_buf.data())); + ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), b_buf.data())); // Second loop depends on first loop. auto c_loop = l.getLoopStmtsFor(c)[0]; @@ -2773,32 +2773,32 @@ TEST(MemDependency, MemDependencyCheckerComputeInline) { Placeholder a_buf("a", kFloat, {4, 5}); Placeholder b_buf("b", kFloat, {5, 6}); - Tensor* c = Compute( + Tensor c = Compute( "broadcast_add", {{4, "m"}, {5, "n"}, {6, "k"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { return a_buf.load(m, n) + b_buf.load(n, k); }); - Tensor* d = Compute( + Tensor d = Compute( "d", {{4, "m"}, {5, "n"}, {6, "k"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { - return c->load(m, n, k) + 1; + return c.load(m, n, k) + 1; }); - LoopNest l(std::vector({d}), {c, d}); - l.computeInline(c->buf()); + LoopNest l({d}, {c, d}); + l.computeInline(c.buf()); - MemDependencyChecker analyzer({a_buf.data(), b_buf.data()}, {d->buf()}); + MemDependencyChecker analyzer({a_buf.data(), b_buf.data()}, {d.buf()}); l.root_stmt()->accept(&analyzer); // Sanity test: Output depends on input. - ASSERT_TRUE(analyzer.dependsIndirectly(d->buf(), a_buf.data())); - ASSERT_TRUE(analyzer.dependsIndirectly(d->buf(), b_buf.data())); + ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), a_buf.data())); + ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), b_buf.data())); // broadcast_add tensor should not appear in trace at all. for (auto& wi : analyzer.getHistory()) { - ASSERT_NE(wi->var(), c->buf()->base_handle()); + ASSERT_NE(wi->var(), c.buf()->base_handle()); } } @@ -2810,7 +2810,7 @@ TEST(MemDependency, MemDependencyCheckerComputeSplit) { Placeholder a_buf("a", kFloat, {4, 5}); Placeholder b_buf("b", kFloat, {5, 6}); - Tensor* c = Compute( + Tensor c = Compute( "broadcast_add", {{4, "m"}, {5, "n"}, {6, "k"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { @@ -2819,13 +2819,12 @@ TEST(MemDependency, MemDependencyCheckerComputeSplit) { LoopNest l({c}); - MemDependencyChecker analyzer_before( - {a_buf.data(), b_buf.data()}, {c->buf()}); + MemDependencyChecker analyzer_before({a_buf.data(), b_buf.data()}, {c.buf()}); l.root_stmt()->accept(&analyzer_before); l.splitWithTail(l.getLoopStmtsFor(c)[0], 2); - MemDependencyChecker analyzer_after({a_buf.data(), b_buf.data()}, {c->buf()}); + MemDependencyChecker analyzer_after({a_buf.data(), b_buf.data()}, {c.buf()}); StmtPtr stmt = IRSimplifier::simplify(l.root_stmt()); stmt->accept(&analyzer_after); @@ -2859,7 +2858,7 @@ TEST(MemDependency, MemDependencyCheckerComputeReorder) { Placeholder a_buf("a", kFloat, {4, 5}); Placeholder b_buf("b", kFloat, {5, 6}); - Tensor* c = Compute( + Tensor c = Compute( "broadcast_add", {{4, "m"}, {5, "n"}, {6, "k"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { @@ -2868,14 +2867,13 @@ TEST(MemDependency, MemDependencyCheckerComputeReorder) { LoopNest l({c}); - MemDependencyChecker analyzer_before( - {a_buf.data(), b_buf.data()}, {c->buf()}); + MemDependencyChecker analyzer_before({a_buf.data(), b_buf.data()}, {c.buf()}); l.root_stmt()->accept(&analyzer_before); auto loops = l.getLoopStmtsFor(c); l.reorderAxis(loops[0], loops[1]); - MemDependencyChecker analyzer_after({a_buf.data(), b_buf.data()}, {c->buf()}); + MemDependencyChecker analyzer_after({a_buf.data(), b_buf.data()}, {c.buf()}); StmtPtr stmt = IRSimplifier::simplify(l.root_stmt()); stmt->accept(&analyzer_after); @@ -2928,22 +2926,22 @@ TEST(MemDependency, MemDependencyCheckerComputeReduce) { Placeholder a(BufHandle("a", {2, 3, 6}, kFloat)); Placeholder b(BufHandle("b", {2, 3, 6}, kFloat)); - Tensor* c = Compute( + Tensor c = Compute( "scale", {{2, "l2"}, {3, "n1"}, {6, "m1"}}, [&](const VarHandle& l, const VarHandle& n, const VarHandle& m) { return b.load(l, n, m) * a.load(l, n, m); }); - Tensor* d = Reduce("sum", {{2, "l1"}}, Sum(), c, {{3, "n1"}, {6, "m1"}}); - LoopNest l(std::vector({d}), {c, d}); + Tensor d = Reduce("sum", {{2, "l1"}}, Sum(), c, {{3, "n1"}, {6, "m1"}}); + LoopNest l({d}, {c, d}); - MemDependencyChecker analyzer({a.data(), b.data()}, {d->buf()}); + MemDependencyChecker analyzer({a.data(), b.data()}, {d.buf()}); l.root_stmt()->accept(&analyzer); // Sanity test: Output depends on input. - ASSERT_TRUE(analyzer.dependsIndirectly(d->buf(), a.data())); - ASSERT_TRUE(analyzer.dependsIndirectly(d->buf(), b.data())); + ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), a.data())); + ASSERT_TRUE(analyzer.dependsIndirectly(d.buf(), b.data())); // Second loop depends on first loop. auto c_loop = l.getLoopStmtsFor(c)[0]; @@ -2965,7 +2963,7 @@ TEST(MemDependency, MemDependencyCheckerComputeGEMM) { Placeholder AP(BufHandle("A", {M, K}, kFloat)); Placeholder BP(BufHandle("B", {K, N}, kFloat)); - Tensor* CT = Reduce( + Tensor CT = Reduce( "gemm", {{M, "M"}, {N, "N"}}, Sum(), @@ -3011,7 +3009,7 @@ TEST(MemDependency, MemDependencyCheckerComputeGEMM) { } { auto const& loops = loop.getLoopStmtsFor(CT); - loop.cacheAccesses(CT->buf(), "C_regs", loops[2]); + loop.cacheAccesses(CT.buf(), "C_regs", loops[2]); } MemDependencyChecker analyzer_unlowered( @@ -3026,12 +3024,12 @@ TEST(MemDependency, MemDependencyCheckerComputeGEMM) { stmt->accept(&analyzer_unlowered); // Outputs depend on inputs. - ASSERT_TRUE(analyzer_unlowered.dependsIndirectly(CT->buf(), AP.data())); - ASSERT_TRUE(analyzer_unlowered.dependsIndirectly(CT->buf(), BP.data())); + ASSERT_TRUE(analyzer_unlowered.dependsIndirectly(CT.buf(), AP.data())); + ASSERT_TRUE(analyzer_unlowered.dependsIndirectly(CT.buf(), BP.data())); // The last write to gemm should cover the total bound of the output. std::shared_ptr outputAccess = - analyzer_unlowered.output(CT->buf()); + analyzer_unlowered.output(CT.buf()); // A single dependency. ASSERT_EQ(outputAccess->dependencies().size(), 1); diff --git a/test/cpp/tensorexpr/test_ops.cpp b/test/cpp/tensorexpr/test_ops.cpp index 674dbd9..122a498 100644 --- a/test/cpp/tensorexpr/test_ops.cpp +++ b/test/cpp/tensorexpr/test_ops.cpp @@ -6,7 +6,7 @@ using namespace torch::jit::tensorexpr; -using Tensors = std::vector; +using Tensors = std::vector; using Args = std::vector; std::unique_ptr compile( const Args& inputs, @@ -28,7 +28,7 @@ TEST(Ops, Sum) { constexpr int N = 16; Placeholder a("a", kFloat, {M, N}); - Tensor* b = computeSum({a.handle(), dims, false}, c10::kFloat); + Tensor b = computeSum({a.handle(), dims, false}, c10::kFloat); auto cg = compile({a}, {b}); auto at = at::arange(M * N, at::kFloat).view({M, N}); diff --git a/test/cpp/tensorexpr/test_reductions.cpp b/test/cpp/tensorexpr/test_reductions.cpp index 449edac..6620ef2 100644 --- a/test/cpp/tensorexpr/test_reductions.cpp +++ b/test/cpp/tensorexpr/test_reductions.cpp @@ -35,7 +35,7 @@ TEST(Reductions, ReduceSum0D_1) { std::vector out(M, -1.f); - Tensor* c = Reduce("sum", {{M, "m"}}, Sum(), b, {}); + Tensor c = Reduce("sum", {{M, "m"}}, Sum(), b, {}); LoopNest loop({c}); loop.prepareForCodegen(); StmtPtr s = loop.root_stmt(); @@ -59,7 +59,7 @@ TEST(Reductions, ReduceSum0D_2) { std::vector out(1, -1.f); - Tensor* c = Reduce("sum", {}, Sum(), b, {}); + Tensor c = Reduce("sum", {}, Sum(), b, {}); LoopNest loop({c}); loop.prepareForCodegen(); StmtPtr s = loop.root_stmt(); @@ -83,7 +83,7 @@ TEST(Reductions, ReduceSum1D) { std::vector out(1, -1.f); - Tensor* c = Reduce("sum", {}, Sum(), b, {{10, "m"}}); + Tensor c = Reduce("sum", {}, Sum(), b, {{10, "m"}}); LoopNest loop({c}); loop.prepareForCodegen(); StmtPtr s = loop.root_stmt(); @@ -114,7 +114,7 @@ TEST(Reductions, ReduceSum2D) { std::vector out(M, -1.f); - Tensor* c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}}); + Tensor c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}}); LoopNest loop({c}); loop.prepareForCodegen(); StmtPtr s = loop.root_stmt(); @@ -145,7 +145,7 @@ TEST(Reductions, ReduceSum3D) { Placeholder b(BufHandle("b", {2, 3, m}, kFloat)); - Tensor* c = Reduce("sum", {{2, "l"}, {3, "n"}}, Sum(), b, {{m, "m"}}); + Tensor c = Reduce("sum", {{2, "l"}, {3, "n"}}, Sum(), b, {{m, "m"}}); LoopNest loop({c}); loop.prepareForCodegen(); StmtPtr s = loop.root_stmt(); @@ -175,7 +175,7 @@ TEST(Reductions, ReduceSum3D) { ASSERT_EQ(cData[i], expected); } - Tensor* d = Reduce("sum2", {{2, "l"}}, Sum(), b, {{3, "n"}, {m, "m"}}); + Tensor d = Reduce("sum2", {{2, "l"}}, Sum(), b, {{3, "n"}, {m, "m"}}); LoopNest loop2({d}); loop2.prepareForCodegen(); StmtPtr s2 = loop2.root_stmt(); @@ -192,8 +192,8 @@ TEST(Reductions, ReduceSum3D) { } // This is the same as just reducing the original result across that axis. - Placeholder c_buf(BufHandle(c->buf())); - Tensor* e = Reduce("sum3", {{2, "l"}}, Sum(), c_buf, {{3, "m"}}); + Placeholder c_buf(BufHandle(c.buf())); + Tensor e = Reduce("sum3", {{2, "l"}}, Sum(), c_buf, {{3, "m"}}); LoopNest loop3({e}); loop3.prepareForCodegen(); StmtPtr s3 = loop3.root_stmt(); @@ -219,7 +219,7 @@ TEST(Reductions, ReduceSum10D) { std::vector in(InputSize, 1.f); std::vector out(OutputSize, -1.f); - Tensor* c = Reduce( + Tensor c = Reduce( "sum", {{2, "a"}, {3, "b"}, {2, "c"}, {3, "d"}, {2, "e"}}, Sum(), @@ -261,7 +261,7 @@ TEST(Reductions, ReduceProduct) { Reducer product( ExprHandle(1.f), [](ExprHandle a, ExprHandle b) { return a * b; }); - Tensor* c = Reduce("product", {{M, "m"}}, product, b, {{N, "n"}}); + Tensor c = Reduce("product", {{M, "m"}}, product, b, {{N, "n"}}); LoopNest loop({c}); loop.prepareForCodegen(); StmtPtr s = loop.root_stmt(); @@ -294,7 +294,7 @@ TEST(Reductions, ReduceMax) { in[j] = j; } - Tensor* dm1 = Reduce("max", {}, Maximum(kFloat), in_, {{10, "m"}}); + Tensor dm1 = Reduce("max", {}, Maximum(kFloat), in_, {{10, "m"}}); LoopNest loop({dm1}); loop.prepareForCodegen(); @@ -309,7 +309,7 @@ TEST(Reductions, ReduceMax) { Placeholder in2_(BufHandle("b", {2, 5}, kFloat)); std::vector out2(2, -1.f); - Tensor* m2d = Reduce("max", {{2, "n"}}, Maximum(kFloat), in2_, {{5, "m"}}); + Tensor m2d = Reduce("max", {{2, "n"}}, Maximum(kFloat), in2_, {{5, "m"}}); LoopNest loop2({m2d}); loop2.prepareForCodegen(); @@ -336,7 +336,7 @@ TEST(Reductions, ReduceMinCustomInitializer) { in[j] = 10 + j; } - Tensor* min = Reduce( + Tensor min = Reduce( "min", {}, Minimum(ExprHandle(minInit)), @@ -372,7 +372,7 @@ TEST(Reductions, ReduceAnyAll) { return CompareSelect::make(a, 1, 1, b, kEQ); }); - Tensor* any = Reduce( + Tensor any = Reduce( "anyEqual", {{4, "i"}}, anyEqSV, @@ -415,7 +415,7 @@ TEST(Reductions, ReduceAnyAll) { return CompareSelect::make(a, 0, 0, b, kEQ); }); - Tensor* allGreaterThan = Reduce( + Tensor allGreaterThan = Reduce( "allGreaterThan", {{4, "i"}}, allGTSV, @@ -465,7 +465,7 @@ TEST(Reductions, ReduceMatmul2D) { } } - Tensor* mm = Reduce( + Tensor mm = Reduce( "mm", {{3, "m"}, {3, "n"}}, Sum(), @@ -501,10 +501,10 @@ TEST(Reductions, ReduceRfactorLike) { std::vector in_rf_(10, -2.f); std::vector out(1, -1.f); - Tensor* l1 = Reduce("l1", {{10, "i"}}, Sum(), in, {{10, "j"}}); - Placeholder in_rf(BufHandle(l1->buf())); + Tensor l1 = Reduce("l1", {{10, "i"}}, Sum(), in, {{10, "j"}}); + Placeholder in_rf(BufHandle(l1.buf())); - Tensor* l2 = Reduce("l2", {}, Sum(), in_rf, {{10, "i"}}); + Tensor l2 = Reduce("l2", {}, Sum(), in_rf, {{10, "i"}}); LoopNest loop({l1, l2}); loop.prepareForCodegen(); @@ -526,14 +526,14 @@ TEST(Reductions, ReduceAsProducer) { Placeholder a(BufHandle("a", {2, 3}, kFloat)); Placeholder b(BufHandle("b", {2, 3, m}, kFloat)); - Tensor* c = Reduce("sum", {{2, "l1"}, {3, "n1"}}, Sum(), b, {{m, "m1"}}); - Tensor* d = Compute( + Tensor c = Reduce("sum", {{2, "l1"}, {3, "n1"}}, Sum(), b, {{m, "m1"}}); + Tensor d = Compute( "scale", {{2, "l2"}, {3, "n1"}}, [&](const VarHandle& l, const VarHandle& n) { - return c->load(l, n) * a.load(l, n); + return c.load(l, n) * a.load(l, n); }); - LoopNest loop(std::vector({d}), {c, d}); + LoopNest loop({d}, {c, d}); loop.prepareForCodegen(); StmtPtr s = loop.root_stmt(); s = IRSimplifier::simplify(s); @@ -571,14 +571,14 @@ TEST(Reductions, ReduceAsConsumer) { Placeholder a(BufHandle("a", {2, 3, m}, kFloat)); Placeholder b(BufHandle("b", {2, 3, m}, kFloat)); - Tensor* c = Compute( + Tensor c = Compute( "scale", {{2, "l2"}, {3, "n1"}, {m, "m1"}}, [&](const VarHandle& l, const VarHandle& n, const VarHandle& m) { return b.load(l, n, m) * a.load(l, n, m); }); - Tensor* d = Reduce("sum", {{2, "l1"}}, Sum(), c, {{3, "n1"}, {m, "m1"}}); - LoopNest loop(std::vector({d}), {c, d}); + Tensor d = Reduce("sum", {{2, "l1"}}, Sum(), c, {{3, "n1"}, {m, "m1"}}); + LoopNest loop({d}, {c, d}); loop.prepareForCodegen(); StmtPtr s = loop.root_stmt(); s = IRSimplifier::simplify(s); @@ -626,7 +626,7 @@ TEST(Reductions, SplitReduceAxis) { } std::vector out(16, -1.f); - Tensor* tensor = Reduce("sum", {{16, "m"}}, Sum(), in, {{8, "n"}}); + Tensor tensor = Reduce("sum", {{16, "m"}}, Sum(), in, {{8, "n"}}); LoopNest l({tensor}); std::vector loops = l.getLoopStmtsFor(tensor); LoopNest::splitWithTail(loops[1], 2); @@ -656,7 +656,7 @@ TEST(Reductions, SplitNonReduceAxis) { } } std::vector out(16, -1.f); - Tensor* tensor = Reduce("sum", {{16, "m"}}, Sum(), in, {{8, "n"}}); + Tensor tensor = Reduce("sum", {{16, "m"}}, Sum(), in, {{8, "n"}}); LoopNest l({tensor}); std::vector loops = l.getLoopStmtsFor(tensor); LoopNest::splitWithTail(loops[0], 2); @@ -687,14 +687,14 @@ TEST(Reductions, ReorderedReductionInitializer) { Placeholder in(BufHandle("in", {1, 12, 6}, kFloat)); std::vector in_(12 * 6, 1.f); - Tensor* tensor_ = Reduce("sum", {{1, "k"}, {12, "n"}}, Sum(), in, {{6, "m"}}); + Tensor tensor_ = Reduce("sum", {{1, "k"}, {12, "n"}}, Sum(), in, {{6, "m"}}); LoopNest l_({tensor_}); l_.prepareForCodegen(); StmtPtr s_ = Stmt::clone(l_.root_stmt()); s_ = IRSimplifier::simplify(s_); - Tensor* tensor = Reduce("sum", {{1, "k"}, {12, "n"}}, Sum(), in, {{6, "m"}}); + Tensor tensor = Reduce("sum", {{1, "k"}, {12, "n"}}, Sum(), in, {{6, "m"}}); LoopNest l({tensor}); auto loops = l.getLoopStmtsFor(tensor); @@ -741,10 +741,10 @@ TEST(Reductions, ReduceRfactor) { std::vector out(1, -1.f); - Tensor* c = Reduce("sum", {}, Sum(), b, {{m, "m"}, {n, "n"}}); + Tensor c = Reduce("sum", {}, Sum(), b, {{m, "m"}, {n, "n"}}); LoopNest loop({c}); std::vector loops = loop.getLoopStmtsFor(c); - auto c_body = loop.getAllWritesToBuf(c->buf())[1]; + auto c_body = loop.getAllWritesToBuf(c.buf())[1]; ASSERT_TRUE(loop.rfactor(c_body, loops.at(0))); auto rc = NodeFinder::find(loop.root_stmt()); ASSERT_EQ(rc.size(), 2); @@ -776,10 +776,10 @@ TEST(Reductions, Reduce3DRfactorInner) { std::vector out(1, -1.f); - Tensor* c = Reduce("sum", {}, Sum(), b, {{m, "m"}, {n, "n"}, {k, "k"}}); + Tensor c = Reduce("sum", {}, Sum(), b, {{m, "m"}, {n, "n"}, {k, "k"}}); LoopNest loop({c}); std::vector loops = loop.getLoopStmtsFor(c); - auto c_body = loop.getAllWritesToBuf(c->buf())[1]; + auto c_body = loop.getAllWritesToBuf(c.buf())[1]; ASSERT_FALSE(loop.rfactor(c_body, loops.at(2))); auto rc = NodeFinder::find(loop.root_stmt()); ASSERT_EQ(rc.size(), 1); @@ -811,10 +811,10 @@ TEST(Reductions, Reduce3DRfactorOuter) { std::vector out(1, -1.f); - Tensor* c = Reduce("sum", {}, Sum(), b, {{m, "m"}, {n, "n"}, {k, "k"}}); + Tensor c = Reduce("sum", {}, Sum(), b, {{m, "m"}, {n, "n"}, {k, "k"}}); LoopNest loop({c}); std::vector loops = loop.getLoopStmtsFor(c); - auto c_body = loop.getAllWritesToBuf(c->buf())[1]; + auto c_body = loop.getAllWritesToBuf(c.buf())[1]; ASSERT_TRUE(loop.rfactor(c_body, loops.at(0))); auto rc = NodeFinder::find(loop.root_stmt()); ASSERT_EQ(rc.size(), 2); @@ -837,7 +837,7 @@ TEST(Reductions, ReduceRepeatedInternalRfactor) { std::vector out(1, -1.f); std::vector ref(1, -1.f); - Tensor* c = Reduce( + Tensor c = Reduce( "sum", {}, Sum(), @@ -854,7 +854,7 @@ TEST(Reductions, ReduceRepeatedInternalRfactor) { IRSimplifier::simplify(refloop.root_stmt()), {in_, c}); ref_cg.call({in, ref}); - BufPtr tmp_buf = c->buf(); + BufPtr tmp_buf = c.buf(); for (int idx = 0; idx < rfac_number; idx++) { auto reduce = loop.getAllWritesToBuf(tmp_buf)[1]; @@ -890,7 +890,7 @@ TEST(Reductions, ReduceSplitTail) { for (int i = 0; i < 3; ++i) { std::vector out(M, -1.f); - Tensor* c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}}); + Tensor c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}}); LoopNest loop({c}); std::vector loops = loop.getLoopStmtsFor(c); LoopNest::splitWithTail(loops[i], 8); @@ -922,7 +922,7 @@ TEST(Reductions, ReduceSplitNoTail) { for (int i = 0; i < 3; ++i) { std::vector out(M, -1.f); - Tensor* c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}}); + Tensor c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}}); LoopNest loop({c}); std::vector loops = loop.getLoopStmtsFor(c); LoopNest::splitWithTail(loops[i], 5); @@ -956,7 +956,7 @@ TEST(Reductions, ReduceOverSplitTail) { for (int i = 0; i < 3; ++i) { std::vector out(M, -1.f); - Tensor* c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}}); + Tensor c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}}); LoopNest loop({c}); std::vector loops = loop.getLoopStmtsFor(c); LoopNest::splitWithTail(loops[i], 16); @@ -989,7 +989,7 @@ TEST(Reductions, ReduceSplitMask) { for (int i = 0; i < 3; ++i) { std::vector out(M, -1.f); - Tensor* c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}}); + Tensor c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}}); LoopNest loop({c}); std::vector loops = loop.getLoopStmtsFor(c); LoopNest::splitWithMask(loops[i], 8); @@ -1021,7 +1021,7 @@ TEST(Reductions, ReduceSplitNoMask) { for (int i = 0; i < 3; ++i) { std::vector out(M, -1.f); - Tensor* c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}}); + Tensor c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}}); LoopNest loop({c}); std::vector loops = loop.getLoopStmtsFor(c); LoopNest::splitWithMask(loops[i], 5); @@ -1054,7 +1054,7 @@ TEST(Reductions, ReduceOverSplitMask) { for (int i = 0; i < 3; ++i) { std::vector out(M, -1.f); - Tensor* c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}}); + Tensor c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}}); LoopNest loop({c}); std::vector loops = loop.getLoopStmtsFor(c); LoopNest::splitWithMask(loops[i], 16); @@ -1090,16 +1090,16 @@ TEST(Reductions, ReduceSplitRfactor) { std::vector out(M, -1.f); - Tensor* c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}}); + Tensor c = Reduce("sum", {{M, "m"}}, Sum(), b, {{N, "n"}, {K, "k"}}); LoopNest loop({c}); std::vector loops = loop.getLoopStmtsFor(c); LoopNest::splitWithTail(loops[2], SPLIT_FACTOR); - auto c_body = loop.getAllWritesToBuf(c->buf())[2]; - auto all_loops = loop.getAllLoopNestsWritingToBuf(c->buf()); + auto c_body = loop.getAllWritesToBuf(c.buf())[2]; + auto all_loops = loop.getAllLoopNestsWritingToBuf(c.buf()); ASSERT_TRUE(all_loops.size() == 3 && all_loops.at(2).size() == 3); LoopNest::reorderAxis(all_loops[2][1], all_loops[2][2]); - all_loops = loop.getAllLoopNestsWritingToBuf(c->buf()); + all_loops = loop.getAllLoopNestsWritingToBuf(c.buf()); ASSERT_TRUE(all_loops.size() == 3 && all_loops.at(2).size() == 3); ASSERT_TRUE(loop.rfactor(c_body, all_loops[2][1])); loop.prepareForCodegen(); @@ -1131,7 +1131,7 @@ TEST(Reductions, ReduceOverSplitRfactor) { std::vector out(1, -1.f); - Tensor* c = Reduce("sum", {}, Sum(), b, {{N, "n"}, {K, "k"}}); + Tensor c = Reduce("sum", {}, Sum(), b, {{N, "n"}, {K, "k"}}); LoopNest loop({c}); std::vector loops = loop.getLoopStmtsFor(c); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) @@ -1139,9 +1139,9 @@ TEST(Reductions, ReduceOverSplitRfactor) { LoopNest::splitWithTail(loops[1], SPLIT_FACTOR, &i, &t); LoopNest::reorderAxis(loops[0], i); - auto all_loops = loop.getAllLoopNestsWritingToBuf(c->buf()); + auto all_loops = loop.getAllLoopNestsWritingToBuf(c.buf()); ASSERT_TRUE(all_loops.size() == 3 && all_loops.at(1).size() == 3); - auto c_body = loop.getAllWritesToBuf(c->buf())[1]; + auto c_body = loop.getAllWritesToBuf(c.buf())[1]; ASSERT_TRUE(loop.rfactor(c_body, all_loops[1][0])); LoopNest::reorderAxis(all_loops[1][0], all_loops[1][2]); @@ -1182,9 +1182,9 @@ TEST(Reductions, ReduceInlineReduction) { Placeholder a_buf("a", kFloat, {M}); Placeholder b_buf("b", kFloat, {M, N, K}); - Tensor* x = Reduce("x", {{M, "m1"}}, Sum(), b_buf, {{N, "n1"}, {K, "k1"}}); - Tensor* y = Compute("y", {{M, "m2"}}, [&](const VarHandle& m) { - return a_buf.load(m) + x->load(m); + Tensor x = Reduce("x", {{M, "m1"}}, Sum(), b_buf, {{N, "n1"}, {K, "k1"}}); + Tensor y = Compute("y", {{M, "m2"}}, [&](const VarHandle& m) { + return a_buf.load(m) + x.load(m); }); PaddedBuffer a_v(M); @@ -1201,9 +1201,9 @@ TEST(Reductions, ReduceInlineReduction) { } } - LoopNest l1(std::vector({y}), {x, y}); + LoopNest l1({y}, {x, y}); // Cannot inline a reduction computation - ASSERT_FALSE(l1.computeInline(x->buf())); + ASSERT_FALSE(l1.computeInline(x.buf())); } TEST(Reductions, ReduceInlineConsumer) { @@ -1215,13 +1215,13 @@ TEST(Reductions, ReduceInlineConsumer) { Placeholder a_buf("a", kFloat, {M, N, K}); Placeholder b_buf("b", kFloat, {M, N, K}); - Tensor* x = Compute( + Tensor x = Compute( "x", {{M, "m1"}, {N, "n1"}, {K, "k1"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { return a_buf.load(m, n, k) + b_buf.load(m, n, k); }); - Tensor* y = Reduce("y", {{M, "m2"}}, Sum(), x, {{N, "n2"}, {K, "k2"}}); + Tensor y = Reduce("y", {{M, "m2"}}, Sum(), x, {{N, "n2"}, {K, "k2"}}); PaddedBuffer a_v(M, N, K); PaddedBuffer b_v(M, N, K); @@ -1235,9 +1235,9 @@ TEST(Reductions, ReduceInlineConsumer) { } } - LoopNest l1(std::vector({y}), {x, y}); + LoopNest l1({y}, {x, y}); LoopNest l2(l1); - l2.computeInline(x->buf()); + l2.computeInline(x.buf()); l1.prepareForCodegen(); l2.prepareForCodegen(); @@ -1269,7 +1269,7 @@ TEST(Reductions, ReduceInlineReducerInternal) { Placeholder a_buf("a", kFloat, {M, N, K}); Placeholder b_buf("b", kFloat, {M, N, K}); - Tensor* x = Compute( + Tensor x = Compute( "x", {{M, "m1"}, {N, "n1"}, {K, "k1"}}, [&](const VarHandle& m, const VarHandle& n, const VarHandle& k) { @@ -1279,7 +1279,7 @@ TEST(Reductions, ReduceInlineReducerInternal) { Reducer minimum(ExprHandle(0.f), [&](ExprHandle a, ExprHandle b) { return Add::make(ExprHandle(1.f), Min::make(a, b, false)); }); - Tensor* y = Reduce("y", {{M, "m2"}}, minimum, x, {{N, "n2"}, {K, "k2"}}); + Tensor y = Reduce("y", {{M, "m2"}}, minimum, x, {{N, "n2"}, {K, "k2"}}); PaddedBuffer a_v(M, N, K); PaddedBuffer b_v(M, N, K); @@ -1293,9 +1293,9 @@ TEST(Reductions, ReduceInlineReducerInternal) { } } - LoopNest l1(std::vector({y}), {x, y}); + LoopNest l1({y}, {x, y}); LoopNest l2(l1); - l2.computeInline(x->buf()); + l2.computeInline(x.buf()); l1.prepareForCodegen(); l2.prepareForCodegen(); @@ -1328,25 +1328,25 @@ TEST(Reductions, ReductionCacheAccessesOperatorAxis) { Placeholder a(BufHandle("a", {L, N, M}, kFloat)); Placeholder b(BufHandle("b", {L, N, M}, kFloat)); - Tensor* c = Compute( + Tensor c = Compute( "scale", {{L, "l2"}, {N, "n1"}, {M, "m1"}}, [&](const VarHandle& l, const VarHandle& n, const VarHandle& m) { return b.load(l, n, m) * a.load(l, n, m); }); - Tensor* d = Reduce("sum", {{L, "l1"}}, Sum(), c, {{N, "n1"}, {M, "m1"}}); + Tensor d = Reduce("sum", {{L, "l1"}}, Sum(), c, {{N, "n1"}, {M, "m1"}}); - Tensor* e = Compute("scale", {{L, "l"}}, [&](const VarHandle& l) { - return b.load(0, 0, l) * d->load(l); + Tensor e = Compute("scale", {{L, "l"}}, [&](const VarHandle& l) { + return b.load(0, 0, l) * d.load(l); }); - LoopNest l(std::vector({e}), {c, d, e}); + LoopNest l({e}, {c, d, e}); LoopNest l_before(l); l_before.prepareForCodegen(); SimpleIREvaluator cg_before(l_before.root_stmt(), {a, b, e}); StmtPtr d_loop = l.getLoopStmtsFor(d)[0]; - l.cacheAccesses(d->buf(), "d_local", d_loop); + l.cacheAccesses(d.buf(), "d_local", d_loop); l.prepareForCodegen(); StmtPtr result = IRSimplifier::simplify(l.root_stmt()); @@ -1405,25 +1405,25 @@ TEST(Reductions, ReductionCacheAccessesOuterReduceAxis) { Placeholder a(BufHandle("a", {L, N, M}, kFloat)); Placeholder b(BufHandle("b", {L, N, M}, kFloat)); - Tensor* c = Compute( + Tensor c = Compute( "scale", {{L, "l2"}, {N, "n1"}, {M, "m1"}}, [&](const VarHandle& l, const VarHandle& n, const VarHandle& m) { return b.load(l, n, m) * a.load(l, n, m); }); - Tensor* d = Reduce("sum", {{L, "l1"}}, Sum(), c, {{N, "n1"}, {M, "m1"}}); + Tensor d = Reduce("sum", {{L, "l1"}}, Sum(), c, {{N, "n1"}, {M, "m1"}}); - Tensor* e = Compute("scale", {{L, "l"}}, [&](const VarHandle& l) { - return b.load(0, 0, l) * d->load(l); + Tensor e = Compute("scale", {{L, "l"}}, [&](const VarHandle& l) { + return b.load(0, 0, l) * d.load(l); }); - LoopNest l(std::vector({e}), {c, d, e}); + LoopNest l({e}, {c, d, e}); LoopNest l_before(l); l_before.prepareForCodegen(); SimpleIREvaluator cg_before(l_before.root_stmt(), {a, b, e}); StmtPtr d_loop = l.getLoopStmtsFor(d)[1]; - l.cacheAccesses(d->buf(), "d_local", d_loop); + l.cacheAccesses(d.buf(), "d_local", d_loop); l.prepareForCodegen(); StmtPtr result = IRSimplifier::simplify(l.root_stmt()); @@ -1480,25 +1480,25 @@ TEST(Reductions, ReductionCacheAccessesInnerReduceAxis) { Placeholder a(BufHandle("a", {L, N, M}, kFloat)); Placeholder b(BufHandle("b", {L, N, M}, kFloat)); - Tensor* c = Compute( + Tensor c = Compute( "scale", {{L, "l2"}, {N, "n1"}, {M, "m1"}}, [&](const VarHandle& l, const VarHandle& n, const VarHandle& m) { return b.load(l, n, m) * a.load(l, n, m); }); - Tensor* d = Reduce("sum", {{L, "l1"}}, Sum(), c, {{N, "n1"}, {M, "m1"}}); + Tensor d = Reduce("sum", {{L, "l1"}}, Sum(), c, {{N, "n1"}, {M, "m1"}}); - Tensor* e = Compute("scale", {{L, "l"}}, [&](const VarHandle& l) { - return b.load(0, 0, l) * d->load(l); + Tensor e = Compute("scale", {{L, "l"}}, [&](const VarHandle& l) { + return b.load(0, 0, l) * d.load(l); }); - LoopNest l(std::vector({e}), {c, d, e}); + LoopNest l({e}, {c, d, e}); LoopNest l_before(l); l_before.prepareForCodegen(); SimpleIREvaluator cg_before(l_before.root_stmt(), {a, b, e}); StmtPtr d_loop = l.getLoopStmtsFor(d)[2]; - l.cacheAccesses(d->buf(), "d_local", d_loop); + l.cacheAccesses(d.buf(), "d_local", d_loop); l.prepareForCodegen(); StmtPtr result = IRSimplifier::simplify(l.root_stmt()); @@ -1551,22 +1551,22 @@ TEST(Reductions, ReductionCacheBodyAccess) { Placeholder a(BufHandle("a", {24, 32, 12}, kFloat)); Placeholder b(BufHandle("b", {24, 32, 12}, kFloat)); - Tensor* c = Compute( + Tensor c = Compute( "scale", {{24, "l2"}, {32, "n1"}, {12, "m1"}}, [&](const VarHandle& l, const VarHandle& n, const VarHandle& m) { return b.load(l, n, m) * a.load(l, n, m); }); - Tensor* d = Reduce("sum", {{24, "l1"}}, Sum(), c, {{32, "n1"}, {12, "m1"}}); + Tensor d = Reduce("sum", {{24, "l1"}}, Sum(), c, {{32, "n1"}, {12, "m1"}}); - Tensor* e = Compute("scale", {{24, "l"}}, [&](const VarHandle& l) { - return b.load(0, 0, l) * d->load(l); + Tensor e = Compute("scale", {{24, "l"}}, [&](const VarHandle& l) { + return b.load(0, 0, l) * d.load(l); }); - LoopNest l(std::vector({e}), {c, d, e}); + LoopNest l({e}, {c, d, e}); StmtPtr d_loop = l.getLoopStmtsFor(d)[1]; - l.cacheAccesses(c->buf(), "scale_local", d_loop); + l.cacheAccesses(c.buf(), "scale_local", d_loop); l.prepareForCodegen(); StmtPtr result = IRSimplifier::simplify(l.root_stmt()); @@ -1592,24 +1592,24 @@ TEST(Reductions, ReductionCacheConsumerAccess) { Placeholder a(BufHandle("a", {24, 32, 12}, kFloat)); Placeholder b(BufHandle("b", {24, 32, 12}, kFloat)); - Tensor* c = Compute( + Tensor c = Compute( "scale", {{24, "l2"}, {32, "n1"}, {12, "m1"}}, [&](const VarHandle& l, const VarHandle& n, const VarHandle& m) { return b.load(l, n, m) * a.load(l, n, m); }); - Tensor* d = Reduce("sum", {{24, "l1"}}, Sum(), c, {{32, "n1"}, {12, "m1"}}); + Tensor d = Reduce("sum", {{24, "l1"}}, Sum(), c, {{32, "n1"}, {12, "m1"}}); - Tensor* e = Compute("scale", {{24, "l"}}, [&](const VarHandle& l) { - return b.load(0, 0, l) * d->load(l); + Tensor e = Compute("scale", {{24, "l"}}, [&](const VarHandle& l) { + return b.load(0, 0, l) * d.load(l); }); - LoopNest l(std::vector({e}), {c, d, e}); + LoopNest l({e}, {c, d, e}); LoopNest::splitWithMask(l.getLoopStmtsFor(e)[0], 4); StmtPtr e_loop = l.getLoopStmtsFor(e)[1]; - l.cacheAccesses(d->buf(), "sum_local", e_loop); + l.cacheAccesses(d.buf(), "sum_local", e_loop); l.prepareForCodegen(); StmtPtr result = IRSimplifier::simplify(l.root_stmt()); @@ -1633,19 +1633,19 @@ TEST(Reductions, ReductionSplitCacheConsumerAccess) { Placeholder a(BufHandle("a", {24, 32, 12}, kFloat)); Placeholder b(BufHandle("b", {24, 32, 12}, kFloat)); - Tensor* c = Compute( + Tensor c = Compute( "scale", {{24, "l2"}, {32, "n1"}, {12, "m1"}}, [&](const VarHandle& l, const VarHandle& n, const VarHandle& m) { return b.load(l, n, m) * a.load(l, n, m); }); - Tensor* d = Reduce("sum", {{24, "l1"}}, Sum(), c, {{32, "n1"}, {12, "m1"}}); + Tensor d = Reduce("sum", {{24, "l1"}}, Sum(), c, {{32, "n1"}, {12, "m1"}}); - Tensor* e = Compute("scale", {{24, "l"}}, [&](const VarHandle& l) { - return b.load(0, 0, l) * d->load(l); + Tensor e = Compute("scale", {{24, "l"}}, [&](const VarHandle& l) { + return b.load(0, 0, l) * d.load(l); }); - LoopNest l(std::vector({e}), {c, d, e}); + LoopNest l({e}, {c, d, e}); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr inner; @@ -1656,7 +1656,7 @@ TEST(Reductions, ReductionSplitCacheConsumerAccess) { // Split reduction consumer. LoopNest::splitWithMask(l.getLoopStmtsFor(e)[0], 4, &inner); - l.cacheAccesses(d->buf(), "sum_local", inner); + l.cacheAccesses(d.buf(), "sum_local", inner); l.prepareForCodegen(); StmtPtr result = IRSimplifier::simplify(l.root_stmt()); @@ -1681,19 +1681,19 @@ TEST(Reductions, ReductionReorderCacheConsumerAccess) { Placeholder a(BufHandle("a", {24, 32, 12}, kFloat)); Placeholder b(BufHandle("b", {24, 32, 12}, kFloat)); - Tensor* c = Compute( + Tensor c = Compute( "scale", {{24, "l2"}, {32, "n1"}, {12, "m1"}}, [&](const VarHandle& l, const VarHandle& n, const VarHandle& m) { return b.load(l, n, m) * a.load(l, n, m); }); - Tensor* d = Reduce("sum", {{24, "l1"}}, Sum(), c, {{32, "n1"}, {12, "m1"}}); + Tensor d = Reduce("sum", {{24, "l1"}}, Sum(), c, {{32, "n1"}, {12, "m1"}}); - Tensor* e = Compute("scale", {{24, "l"}}, [&](const VarHandle& l) { - return b.load(0, 0, l) * d->load(l); + Tensor e = Compute("scale", {{24, "l"}}, [&](const VarHandle& l) { + return b.load(0, 0, l) * d.load(l); }); - LoopNest l(std::vector({e}), {c, d, e}); + LoopNest l({e}, {c, d, e}); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr inner; @@ -1705,7 +1705,7 @@ TEST(Reductions, ReductionReorderCacheConsumerAccess) { // Split reduction consumer. LoopNest::splitWithMask(l.getLoopStmtsFor(e)[0], 4, &inner); - l.cacheAccesses(d->buf(), "sum_local", inner); + l.cacheAccesses(d.buf(), "sum_local", inner); l.prepareForCodegen(); StmtPtr result = IRSimplifier::simplify(l.root_stmt()); @@ -1742,13 +1742,13 @@ TEST(Reductions, ReductionRfactorCacheTempOuter) { std::vector out(1, -1.f); - Tensor* c = Reduce("sum", {}, Sum(), b, {{m, "a"}, {n, "b"}, {k, "c"}}); + Tensor c = Reduce("sum", {}, Sum(), b, {{m, "a"}, {n, "b"}, {k, "c"}}); LoopNest loop({c}); std::vector loops = loop.getLoopStmtsFor(c); LoopNest::reorderAxis(loops.at(0), loops.at(1)); loops = loop.getLoopStmtsFor(c); - auto c_body = loop.getAllWritesToBuf(c->buf())[1]; + auto c_body = loop.getAllWritesToBuf(c.buf())[1]; // NOLINTNEXTLINE(cppcoreguidelines-init-variables) BufPtr rfac_buf; ASSERT_TRUE(loop.rfactor(c_body, loops.at(0), &rfac_buf)); @@ -1811,10 +1811,10 @@ TEST(Reductions, ReductionRfactorCacheTempInner) { std::vector out(1, -1.f); - Tensor* c = Reduce("sum", {}, Sum(), b, {{m, "a"}, {n, "b"}, {k, "c"}}); + Tensor c = Reduce("sum", {}, Sum(), b, {{m, "a"}, {n, "b"}, {k, "c"}}); LoopNest loop({c}); std::vector loops = loop.getLoopStmtsFor(c); - auto c_body = loop.getAllWritesToBuf(c->buf())[1]; + auto c_body = loop.getAllWritesToBuf(c.buf())[1]; LoopNest::reorderAxis(loops.at(0), loops.at(1)); loops = loop.getLoopStmtsFor(c); @@ -1871,7 +1871,7 @@ TEST(Reductions, ReductionVectorize) { Placeholder in(BufHandle("in", {8, 8}, kFloat)); - Tensor* tensor = Reduce("sum", {{8, "m"}}, Sum(), in, {{8, "n"}}); + Tensor tensor = Reduce("sum", {{8, "m"}}, Sum(), in, {{8, "n"}}); LoopNest l_before({tensor}); LoopNest l(l_before); l_before.prepareForCodegen(); @@ -1909,7 +1909,7 @@ TEST(Reductions, ReductionVectorizeInner) { Placeholder in(BufHandle("in", {8, 8}, kFloat)); - Tensor* tensor = Reduce("sum", {{8, "m"}}, Sum(), in, {{8, "n"}}); + Tensor tensor = Reduce("sum", {{8, "m"}}, Sum(), in, {{8, "n"}}); LoopNest l({tensor}); ASSERT_FALSE(LoopNest::vectorize(l.getLoopStmtsFor(tensor)[1])); @@ -1929,7 +1929,7 @@ TEST(Reductions, ReductionVectorizeRfactor) { Placeholder in(BufHandle("in", {8, 8}, kFloat)); - Tensor* tensor = Reduce("sum", {}, Sum(), in, {{8, "m"}, {8, "n"}}); + Tensor tensor = Reduce("sum", {}, Sum(), in, {{8, "m"}, {8, "n"}}); LoopNest l_before({tensor}); LoopNest l(l_before); @@ -1944,7 +1944,7 @@ TEST(Reductions, ReductionVectorizeRfactor) { std::vector loops = l.getLoopStmtsFor(tensor); LoopNest::reorderAxis(loops[0], loops[1]); loops = l.getLoopStmtsFor(tensor); - auto tensor_body = l.getAllWritesToBuf(tensor->buf())[1]; + auto tensor_body = l.getAllWritesToBuf(tensor.buf())[1]; BufPtr rfac_buf = nullptr; ASSERT_TRUE(LoopNest::rfactor(tensor_body, loops.at(0), &rfac_buf)); @@ -1988,7 +1988,7 @@ TEST(Reductions, InitFunction) { constexpr int N = 16; Placeholder A("A", kFloat, {M, N}); Placeholder B("B", kFloat, {N}); - Tensor* C = Reduce( + Tensor C = Reduce( "C", {{N, "n"}}, Sum(), diff --git a/test/cpp/tensorexpr/test_simplify.cpp b/test/cpp/tensorexpr/test_simplify.cpp index c25ae4f..0df9e92 100644 --- a/test/cpp/tensorexpr/test_simplify.cpp +++ b/test/cpp/tensorexpr/test_simplify.cpp @@ -3932,7 +3932,7 @@ TEST(Simplify, SimplifyForCleansUp) { { Placeholder a("a", kFloat, {1, 12, 1}); VarHandle x("x", kInt); - Tensor* b = Compute( + Tensor b = Compute( // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) "x", {{1, "i"}, {12, "m"}, {1, "n"}}, diff --git a/test/cpp/tensorexpr/tutorial.cpp b/test/cpp/tensorexpr/tutorial.cpp index 5a6f257..0f0277e 100644 --- a/test/cpp/tensorexpr/tutorial.cpp +++ b/test/cpp/tensorexpr/tutorial.cpp @@ -155,8 +155,8 @@ int main(int argc, char* argv[]) { ExprPtr body = alloc(i, j); // Finally, we pass all these pieces together to Tensor constructor: - Tensor* X = new Tensor(buf, args, body); - std::cout << "Tensor computation: " << *X << std::endl; + Tensor X = Tensor(buf, args, body); + std::cout << "Tensor computation: " << X << std::endl; // Prints: // Tensor computation: Tensor X[64, 32]: // for (int i = 0; i < 64; i++) { @@ -171,11 +171,11 @@ int main(int argc, char* argv[]) { // constructing Exprs, Tensors also have a more convenient API for // construction. It is based on Compute API, which takes a name, // dimensions, and a lambda specifying the computation body: - Tensor* Z = Compute( + Tensor Z = Compute( "Z", {{64, "i"}, {32, "j"}}, [](const VarHandle& i, const VarHandle& j) { return i / j; }); - std::cout << "Tensor computation: " << *Z << std::endl; + std::cout << "Tensor computation: " << Z << std::endl; // Prints: // Tensor computation: Tensor Z[64, 32]: // for (int i = 0; i < 64; i++) { @@ -187,13 +187,13 @@ int main(int argc, char* argv[]) { // Tensors might access other tensors and external placeholders in their // expressions. It can be done like so: Placeholder P("P", kInt, {64, 32}); - Tensor* R = Compute( + Tensor R = Compute( "R", {{64, "i"}, {32, "j"}}, [&](const VarHandle& i, const VarHandle& j) { - return Z->load(i, j) * P.load(i, j); + return Z.load(i, j) * P.load(i, j); }); - std::cout << "Tensor computation: " << *R << std::endl; + std::cout << "Tensor computation: " << R << std::endl; // Prints: // Tensor computation: Tensor R[64, 32]: // for (int i = 0; i < 64; i++) { @@ -224,20 +224,20 @@ int main(int argc, char* argv[]) { // Let's create a simple tensor expression and construct a loop nest for it. Placeholder A("A", kFloat, {64, 32}); Placeholder B("B", kFloat, {64, 32}); - Tensor* X = Compute( + Tensor X = Compute( "X", {{64, "i"}, {32, "j"}}, [&](const VarHandle& i, const VarHandle& j) { return A.load(i, j) + B.load(i, j); }); - Tensor* Y = Compute( + Tensor Y = Compute( "Y", {{64, "i"}, {32, "j"}}, [&](const VarHandle& i, const VarHandle& j) { - return sigmoid(X->load(i, j)); + return sigmoid(X.load(i, j)); }); - std::cout << "Tensor computation X: " << *X - << "Tensor computation Y: " << *Y << std::endl; + std::cout << "Tensor computation X: " << X << "Tensor computation Y: " << Y + << std::endl; // Prints: // Tensor computation X: Tensor X[64, 32]: // for (int i = 0; i < 64; i++) { @@ -256,9 +256,7 @@ int main(int argc, char* argv[]) { // Creating a loop nest is as quite simple, we just need to specify a list // of all and a list of output tensors: // NOLINTNEXTLINE(bugprone-argument-comment) - std::vector outputs = {Y}; - std::vector all = {X, Y}; - LoopNest loopnest(outputs, all); + LoopNest loopnest(/*outputs=*/{Y}, /*all=*/{X, Y}); // An IR used in LoopNest is based on tensor statements, represented by // `Stmt` class. Statements are used to specify the loop nest structure, and @@ -357,7 +355,7 @@ int main(int argc, char* argv[]) { // Let's start by constructing a simple computation for us to work with: Placeholder A("A", kInt, {64, 32}); Placeholder B("B", kInt, {64, 32}); - Tensor* X = Compute( + Tensor X = Compute( "X", {{64, "i"}, {32, "j"}}, [&](const VarHandle& i, const VarHandle& j) { diff --git a/torch/csrc/jit/runtime/static/ops.cpp b/torch/csrc/jit/runtime/static/ops.cpp index 27f6e54..0a34f47 100644 --- a/torch/csrc/jit/runtime/static/ops.cpp +++ b/torch/csrc/jit/runtime/static/ops.cpp @@ -500,7 +500,7 @@ struct TEWrapper { void optimizePointwise( tensorexpr::LoopNest* ln, - tensorexpr::Tensor* target, + tensorexpr::Tensor target, int width) { using namespace torch::jit::tensorexpr; std::vector loops = ln->getLoopStmtsFor(target); @@ -513,7 +513,7 @@ void optimizePointwise( std::shared_ptr wrapTECompute( std::shared_ptr wrap, tensorexpr::Placeholder& in, - tensorexpr::Tensor* out, + tensorexpr::Tensor out, tensorexpr::VarHandle& dim, int width = kVectorWidth) { using namespace torch::jit::tensorexpr; @@ -553,7 +553,7 @@ struct TEWrapper { std::shared_ptr wrapTECompute( std::shared_ptr wrap, tensorexpr::Placeholder& in, - tensorexpr::Tensor* out, + tensorexpr::Tensor out, tensorexpr::VarHandle& dim, int width = kVectorWidth) { return wrap; @@ -593,7 +593,7 @@ std::shared_ptr createLogit(c10::optional clamp) { auto wrap = std::make_shared(); auto N = VarHandle("N", kInt); Placeholder A("A", kFloat, {N}); - tensorexpr::Tensor* B = Compute("B", {N}, [&](const VarHandle& i) { + tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i) { auto A_elem = [&]() { if (!clamp) { return A.load(i); @@ -619,7 +619,7 @@ std::shared_ptr createRelu() { wrap = std::make_shared(); auto N = VarHandle("N", kInt); Placeholder A("A", kFloat, {N}); - tensorexpr::Tensor* B = Compute("B", {N}, [&](const VarHandle& i) { + tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i) { auto zero = FloatImm::make(0.f); auto a = A.load(i); return ifThenElse(a < zero, zero, a); @@ -638,7 +638,7 @@ std::shared_ptr createTanh() { wrap = std::make_shared(); auto N = VarHandle("N", kInt); Placeholder A("A", kFloat, {N}); - tensorexpr::Tensor* B = Compute("B", {N}, [&](const VarHandle& i) { + tensorexpr::Tensor B = Compute("B", {N}, [&](const VarHandle& i) { auto a = A.load(i); return fast_tanh(a); }); @@ -656,7 +656,7 @@ std::shared_ptr createSigmoid() { wrap = std::make_shared(); auto N = VarHandle("N", kInt); Placeholder A("A", kFloat, {N}); - Tensor* B = + Tensor B = Compute("B", {N}, [&](const VarHandle& i) { return sigmoid(A.load(i)); }); // NNC uses sleef for vectorizing sigmoid, which comes in an 8-wide flavor // (Sleef_expf8). diff --git a/torch/csrc/jit/tensorexpr/codegen.h b/torch/csrc/jit/tensorexpr/codegen.h index 77ba8e1..d7cfe78 100644 --- a/torch/csrc/jit/tensorexpr/codegen.h +++ b/torch/csrc/jit/tensorexpr/codegen.h @@ -104,7 +104,7 @@ class TORCH_API CodeGen { class CodeGen::BufferArg { public: BufferArg(const Placeholder& buffer) : buf_(buffer.data()) {} - BufferArg(Tensor* tensor) : buf_(tensor->buf()) {} + BufferArg(Tensor tensor) : buf_(tensor.buf()) {} BufferArg(const VarHandle& var) : var_(var.node()), isVar_(true) {} BufferArg(const BufHandle& buf) : buf_(buf.node()) {} diff --git a/torch/csrc/jit/tensorexpr/ir_printer.cpp b/torch/csrc/jit/tensorexpr/ir_printer.cpp index f885246..27b56e2 100644 --- a/torch/csrc/jit/tensorexpr/ir_printer.cpp +++ b/torch/csrc/jit/tensorexpr/ir_printer.cpp @@ -545,7 +545,7 @@ std::ostream& operator<<(std::ostream& stream, const Stmt& stmt) { } std::ostream& operator<<(std::ostream& stream, const Tensor& t) { - stream << std::to_string(&t); + stream << std::to_string(t); return stream; } @@ -568,7 +568,7 @@ void print(StmtPtr stmt) { } } -void print(const Tensor* t) { +void print(const Tensor& t) { std::cout << std::to_string(t); } @@ -589,20 +589,17 @@ std::string to_string(StmtPtr stmt) { return oss.str(); } -std::string to_string(const Tensor* t) { - if (!t) { - return "(null tensor)\n"; - } +std::string to_string(const Tensor& t) { std::ostringstream oss; // TODO: move this to Buf printer - oss << "Tensor " << t->buf()->name_hint() << "["; - for (const auto i : c10::irange(t->buf()->ndim())) { + oss << "Tensor " << t.buf()->name_hint() << "["; + for (const auto i : c10::irange(t.buf()->ndim())) { if (i != 0) { oss << ", "; } - oss << *t->buf()->dim(i); + oss << *t.buf()->dim(i); } - oss << "]:\n" << *t->stmt() << "\n"; + oss << "]:\n" << *t.stmt() << "\n"; return oss.str(); } } // namespace std diff --git a/torch/csrc/jit/tensorexpr/ir_printer.h b/torch/csrc/jit/tensorexpr/ir_printer.h index e76dcca..321d1ef 100644 --- a/torch/csrc/jit/tensorexpr/ir_printer.h +++ b/torch/csrc/jit/tensorexpr/ir_printer.h @@ -103,7 +103,7 @@ TORCH_API std::ostream& operator<<(std::ostream& stream, const Tensor&); TORCH_API void print(ExprPtr expr); TORCH_API void print(StmtPtr stmt); -TORCH_API void print(const Tensor* t); +TORCH_API void print(const Tensor& t); } // namespace tensorexpr } // namespace jit @@ -119,5 +119,5 @@ using torch::jit::tensorexpr::Tensor; TORCH_API std::string to_string(ExprPtr expr); TORCH_API std::string to_string(StmtPtr stmt); -TORCH_API std::string to_string(const Tensor* t); +TORCH_API std::string to_string(const Tensor& t); } // namespace std diff --git a/torch/csrc/jit/tensorexpr/kernel.cpp b/torch/csrc/jit/tensorexpr/kernel.cpp index faacd02..8076ba2 100644 --- a/torch/csrc/jit/tensorexpr/kernel.cpp +++ b/torch/csrc/jit/tensorexpr/kernel.cpp @@ -960,7 +960,7 @@ std::vector TensorExprKernel::broadcastShapesMut( return res.first; } -Tensor* computeOneOperand( +Tensor computeOneOperand( const std::string& name, const std::vector& inputValues, const std::vector& outputShape, @@ -981,7 +981,7 @@ Tensor* computeOneOperand( }); } -Tensor* computeTwoOperand( +Tensor computeTwoOperand( const std::string& name, const std::vector& inputValues, const std::vector& outputShape, @@ -1004,7 +1004,7 @@ Tensor* computeTwoOperand( }); } -Tensor* computeTwoOperandWithAlpha( +Tensor computeTwoOperandWithAlpha( const std::string& name, const std::vector& inputValues, const std::vector& outputShape, @@ -1028,7 +1028,7 @@ Tensor* computeTwoOperandWithAlpha( }); } -Tensor* computeConditionWithTwoOperand( +Tensor computeConditionWithTwoOperand( const std::string& name, const std::vector& inputValues, const std::vector& outputShape, @@ -1055,7 +1055,7 @@ Tensor* computeConditionWithTwoOperand( }); } -Tensor* computeThreeOperand( +Tensor computeThreeOperand( const std::string& name, const std::vector& inputValues, const std::vector& outputShape, @@ -1083,7 +1083,7 @@ Tensor* computeThreeOperand( return demoteOutput(compute, outputType); }); } -Tensor* computeFourOperand( +Tensor computeFourOperand( const std::string& name, const std::vector& inputValues, const std::vector& outputShape, @@ -1135,7 +1135,7 @@ std::pair> processCatList( } return {highType, nonEmptyInputs}; } -Tensor* computeCatWoConditionals( +Tensor computeCatWoConditionals( const std::vector& inputs, const std::vector& outputShape) { // NOLINTNEXTLINE(performance-unnecessary-copy-initialization) @@ -1164,7 +1164,7 @@ Tensor* computeCatWoConditionals( auto output_buf = alloc("aten_cat", output_sizes_expr, ToDtype(high_type)); if (non_empty_inputs.size() == 0) { - return new Tensor( + return Tensor( output_buf, alloc(std::vector({}))); } @@ -1213,10 +1213,10 @@ Tensor* computeCatWoConditionals( concat_dim_size = alloc(concat_dim_size, input_dims[norm_concat_dim].node()); } - return new Tensor(output_buf, IRSimplifier::simplify(block)); + return Tensor(output_buf, IRSimplifier::simplify(block)); } -Tensor* computeCat( +Tensor computeCat( const std::vector& inputs, const std::vector& outputShape, at::Device device) { @@ -1276,7 +1276,7 @@ Tensor* computeCat( }); } -Tensor* computeConv2d( +Tensor computeConv2d( const std::vector& inputs, const std::vector& outputShape, const c10::optional& outputType) { @@ -1319,10 +1319,10 @@ Tensor* computeConv2d( dilation[0], dilation[1], groups}); - return new Tensor(ResultBuf.node(), s); + return Tensor(ResultBuf.node(), s); } -Tensor* tensorexpr::computeOperandValue( +Tensor tensorexpr::computeOperandValue( c10::Symbol op, const std::vector& inputs, const std::vector& outputShape, @@ -2391,7 +2391,7 @@ c10::optional findDtypeForValue(const torch::jit::Value* v) { return c10::nullopt; } -Tensor* TensorExprKernel::computeValue(const torch::jit::Value* v) { +Tensor TensorExprKernel::computeValue(const torch::jit::Value* v) { auto inputs = v->node()->inputs(); auto op = v->node()->kind(); @@ -2703,9 +2703,9 @@ static std::vector toExprHandles(const std::vector& sizes) { return dims; } -Tensor* TensorExprKernel::bindInput(const torch::jit::Value* input) { +Tensor TensorExprKernel::bindInput(const torch::jit::Value* input) { auto const& t = input->type(); - Tensor* result = nullptr; + Tensor result(nullptr, nullptr); switch (t->kind()) { case TypeKind::TensorType: { auto tt = input->type()->cast(); @@ -2744,7 +2744,7 @@ Tensor* TensorExprKernel::bindInput(const torch::jit::Value* input) { } return inBuffer.load(idx); }); - bufs_.emplace(input, result->buf()); + bufs_.emplace(input, result.buf()); bufferArgs_.emplace_back(inBuffer); break; @@ -2800,7 +2800,7 @@ bool denseAndNonOverlapping( return (strides == at::infer_dense_strides(sizes, strides)); } -Tensor* TensorExprKernel::convertOutputToCorrectStrides(torch::jit::Value* v) { +Tensor TensorExprKernel::convertOutputToCorrectStrides(torch::jit::Value* v) { const TensorTypePtr& tt = v->type()->expect(); TORCH_INTERNAL_ASSERT(bufs_.count(v)); BufPtr buf = bufs_.at(v); @@ -2816,19 +2816,19 @@ Tensor* TensorExprKernel::convertOutputToCorrectStrides(torch::jit::Value* v) { auto sizes = *tt->sizes().concrete_sizes(); std::vector default_strides = TensorType::contiguousStridesOf(sizes); if (!tt->strides().concrete_sizes()) { - return new Tensor(buf, nullptr); + return Tensor(buf, nullptr); } TORCH_INTERNAL_ASSERT(tt->strides().concrete_sizes()); const std::vector strides = *tt->strides().concrete_sizes(); // All Tensors in NNC are layed out in default, contiguous layout. // If the output is also default contiguous we don't need to do anything if (strides == default_strides) { - return new Tensor(buf, nullptr); + return Tensor(buf, nullptr); } // If the tensor is not dense or overlaps, we have // no way of matching the profiled striding if (!denseAndNonOverlapping(sizes, strides)) { - return new Tensor(buf, nullptr); + return Tensor(buf, nullptr); } auto dims = c10::fmap(sizesForValue(v)); @@ -2922,8 +2922,9 @@ void TensorExprKernel::compile() { nInputs_ = graph_->inputs().size(); genInputDebugNames(); for (auto const& input : graph_->inputs()) { - if (Tensor* t = bindInput(input)) { - block->append_stmt(t->stmt()); + Tensor t = bindInput(input); + if (t.stmt()) { + block->append_stmt(t.stmt()); } } @@ -2937,10 +2938,9 @@ void TensorExprKernel::compile() { } else { for (auto const& output : n->outputs()) { if (output->hasUses()) { - Tensor* t = computeValue(output); - bufs_.emplace(output, t->buf()); - // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) - block->append_stmt(t->stmt()); + Tensor t = computeValue(output); + bufs_.emplace(output, t.buf()); + block->append_stmt(t.stmt()); } } } @@ -2958,12 +2958,12 @@ void TensorExprKernel::compile() { // The "strided" tensor will be incorrect if used in NNC, // since NNC views it as contiguous. Only convert it to the right // strides at the end of the kernel (if already contiguous it's a no-op) - Tensor* properly_strided_output = convertOutputToCorrectStrides(output); - if (properly_strided_output->stmt()) { - block->append_stmt(properly_strided_output->stmt()); + Tensor properly_strided_output = convertOutputToCorrectStrides(output); + if (properly_strided_output.stmt()) { + block->append_stmt(properly_strided_output.stmt()); } // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) - bufs_[output] = properly_strided_output->buf(); + bufs_[output] = properly_strided_output.buf(); const auto& tt = output->type()->expect(); auto sizes = *tt->sizes().concrete_sizes(); tensorOutputSizes_.push_back(sizes); diff --git a/torch/csrc/jit/tensorexpr/kernel.h b/torch/csrc/jit/tensorexpr/kernel.h index 7b35e1e..a8a57b9 100644 --- a/torch/csrc/jit/tensorexpr/kernel.h +++ b/torch/csrc/jit/tensorexpr/kernel.h @@ -47,7 +47,7 @@ using ArgValue = c10::variant< IntList, ArgNone>; -using NNCLoweringFunction = std::function&, const std::vector&, const c10::optional&, @@ -123,7 +123,7 @@ struct TensorInfo { c10::ScalarType dtype; }; -TORCH_API Tensor* computeOperandValue( +TORCH_API Tensor computeOperandValue( c10::Symbol op, const std::vector& inputs, const std::vector& outputShape, @@ -209,7 +209,7 @@ class TORCH_API TensorExprKernel { const torch::jit::Value* v, const std::vector& axes); - Tensor* computeValue(const torch::jit::Value* v); + Tensor computeValue(const torch::jit::Value* v); void bindConstant(const torch::jit::Value* v); @@ -222,9 +222,9 @@ class TORCH_API TensorExprKernel { std::vector& outputs); BackendType inferBackendTypeFromDevice(at::Device device); - Tensor* bindInput(const torch::jit::Value* input); + Tensor bindInput(const torch::jit::Value* input); - Tensor* convertOutputToCorrectStrides(torch::jit::Value* v); + Tensor convertOutputToCorrectStrides(torch::jit::Value* v); // Captures the information for reduction operation nodes. struct ReductionInfo { diff --git a/torch/csrc/jit/tensorexpr/loopnest.cpp b/torch/csrc/jit/tensorexpr/loopnest.cpp index d9d2073..1904999 100644 --- a/torch/csrc/jit/tensorexpr/loopnest.cpp +++ b/torch/csrc/jit/tensorexpr/loopnest.cpp @@ -47,14 +47,14 @@ LoopNest::LoopNest(StmtPtr stmt, std::unordered_set output_bufs) // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) LoopNest::LoopNest( - const std::vector& output_tensors, - const std::vector& tensors_to_compute) { + const std::vector& output_tensors, + const std::vector& tensors_to_compute) { initialize(output_tensors, tensors_to_compute); verify(root_stmt_); } // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) -LoopNest::LoopNest(const std::vector& output_tensors) { +LoopNest::LoopNest(const std::vector& output_tensors) { initialize(output_tensors, output_tensors); verify(root_stmt_); } @@ -486,15 +486,15 @@ bool LoopNest::vectorize(ForPtr f) { } void LoopNest::initialize( - const std::vector& output_tensors, - const std::vector& tensors_to_compute) { + const std::vector& output_tensors, + const std::vector& tensors_to_compute) { for (auto t : output_tensors) { - output_bufs_.insert(t->buf()); + output_bufs_.insert(t.buf()); } std::vector loops; - for (Tensor* t : tensors_to_compute) { - StmtPtr loop = t->stmt(); + for (Tensor t : tensors_to_compute) { + StmtPtr loop = t.stmt(); if (loop->get_parent()) { std::cerr << "Error: creating a loopnest from already used Tensors\n"; loops = {}; @@ -2384,7 +2384,7 @@ void LoopNest::compressAllBuffers(StmtPtr stmt) { } } -std::vector LoopNest::getLoopStmtsFor(Tensor* t) const { +std::vector LoopNest::getLoopStmtsFor(Tensor t) const { StmtPtr cur_stmt = getLoopBodyFor(t); return getLoopStmtsFor(cur_stmt); } @@ -2407,8 +2407,8 @@ std::vector LoopNest::getLoopStmtsFor(StmtPtr s) const { return result; } -StmtPtr LoopNest::getLoopBodyFor(Tensor* t) const { - return getLoopBodyFor(t->buf()); +StmtPtr LoopNest::getLoopBodyFor(Tensor t) const { + return getLoopBodyFor(t.buf()); } StmtPtr LoopNest::getLoopBodyFor(BufPtr buf) const { diff --git a/torch/csrc/jit/tensorexpr/loopnest.h b/torch/csrc/jit/tensorexpr/loopnest.h index c8cf2d8..42f072d 100644 --- a/torch/csrc/jit/tensorexpr/loopnest.h +++ b/torch/csrc/jit/tensorexpr/loopnest.h @@ -27,11 +27,11 @@ class TORCH_API LoopNest { public: // A constructor for building a LoopNest from a list of Tensors LoopNest( - const std::vector& output_tensors, - const std::vector& tensors_to_compute); + const std::vector& output_tensors, + const std::vector& tensors_to_compute); // A convenience constructor for the case when all tensors are output tensors - LoopNest(const std::vector& output_tensors); + LoopNest(const std::vector& output_tensors); // A constructor for building a LoopNest from an Stmt and a list of output // buffers. @@ -45,10 +45,10 @@ class TORCH_API LoopNest { return root_stmt_; } - std::vector getLoopStmtsFor(Tensor*) const; + std::vector getLoopStmtsFor(Tensor) const; std::vector getLoopStmtsFor(BufPtr) const; std::vector getLoopStmtsFor(StmtPtr) const; - StmtPtr getLoopBodyFor(Tensor*) const; + StmtPtr getLoopBodyFor(Tensor) const; StmtPtr getLoopBodyFor(BufPtr) const; // Returns the For stmt indexed by 'indices' in the 'root' For stmt. @@ -547,8 +547,8 @@ class TORCH_API LoopNest { private: void initialize( - const std::vector& output_tensors, - const std::vector& tensors_to_compute); + const std::vector& output_tensors, + const std::vector& tensors_to_compute); StmtPtr insertAllocFree(StmtPtr stmt); const std::unordered_set getIntermediateBufs() const; diff --git a/torch/csrc/jit/tensorexpr/operators/conv2d.cpp b/torch/csrc/jit/tensorexpr/operators/conv2d.cpp index c4af83a..51d323f 100644 --- a/torch/csrc/jit/tensorexpr/operators/conv2d.cpp +++ b/torch/csrc/jit/tensorexpr/operators/conv2d.cpp @@ -16,7 +16,7 @@ void assert_dims_constant(const BufHandle& buf) { using InitFunc = std::function&)>; -Tensor* conv2d_depthwise_static( +Tensor conv2d_depthwise_static( BufHandle input, BufHandle weight, const InitFunc& init_func, @@ -45,7 +45,7 @@ Tensor* conv2d_depthwise_static( auto OH = (H - R + 2 * pad) / stride + 1; auto OW = (W - S + 2 * pad) / stride + 1; - Tensor* conv = Reduce( + Tensor conv = Reduce( "conv2d_depthwise", {{N, "n"}, {K, "k"}, {OH, "oh"}, {OW, "ow"}}, Sum(), @@ -83,7 +83,7 @@ Tensor* conv2d_depthwise_static( } else if (R == 3 && stride == 1 && pad == 1) { // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ForPtr main, peeled; - auto loops = nest.getAllLoopNestsWritingToBuf(conv->buf()); + auto loops = nest.getAllLoopNestsWritingToBuf(conv.buf()); main = loops[1][kLoopW]; nest.sliceHead(main, 1, &peeled, &main); nest.sliceTail(main, 1, &main, &peeled); @@ -92,10 +92,10 @@ Tensor* conv2d_depthwise_static( nest.sliceTail(main, 1, &main, &peeled); } - return new Tensor(conv->buf(), nest.root_stmt()); + return Tensor(conv.buf(), nest.root_stmt()); } -Tensor* conv2d_depthwise_dynamic( +Tensor conv2d_depthwise_dynamic( BufHandle input, BufHandle weight, const InitFunc& init_func, @@ -144,7 +144,7 @@ Tensor* conv2d_depthwise_dynamic( } // namespace -Tensor* conv2d_depthwise( +Tensor conv2d_depthwise( BufHandle input, BufHandle weight, BufHandle bias, @@ -158,7 +158,7 @@ Tensor* conv2d_depthwise( return conv2d_depthwise_static(input, weight, init_func, stride, pad, groups); } -Tensor* conv2d_depthwise( +Tensor conv2d_depthwise( BufHandle input, BufHandle weight, int stride, @@ -170,7 +170,7 @@ Tensor* conv2d_depthwise( return conv2d_depthwise_static(input, weight, init_func, stride, pad, groups); } -Tensor* conv2d_depthwise( +Tensor conv2d_depthwise( BufHandle input, BufHandle weight, BufHandle bias, @@ -206,7 +206,7 @@ Tensor* conv2d_depthwise( groups); } -Tensor* conv2d_depthwise( +Tensor conv2d_depthwise( BufHandle input, BufHandle weight, ExprHandle N, diff --git a/torch/csrc/jit/tensorexpr/operators/conv2d.h b/torch/csrc/jit/tensorexpr/operators/conv2d.h index 14612fb..4c2215b 100644 --- a/torch/csrc/jit/tensorexpr/operators/conv2d.h +++ b/torch/csrc/jit/tensorexpr/operators/conv2d.h @@ -7,7 +7,7 @@ namespace jit { namespace tensorexpr { // An API to compute 2D depthwise convolutions with bias. -TORCH_API Tensor* conv2d_depthwise( +TORCH_API Tensor conv2d_depthwise( BufHandle input, BufHandle weight, BufHandle bias, @@ -16,14 +16,14 @@ TORCH_API Tensor* conv2d_depthwise( int groups); // An API to compute 2D depthwise convolutions without bias. -TORCH_API Tensor* conv2d_depthwise( +TORCH_API Tensor conv2d_depthwise( BufHandle input, BufHandle weight, int stride, int pad, int groups); -TORCH_API Tensor* conv2d_depthwise( +TORCH_API Tensor conv2d_depthwise( BufHandle input, BufHandle weight, BufHandle bias, @@ -39,7 +39,7 @@ TORCH_API Tensor* conv2d_depthwise( ExprHandle pad, ExprHandle groups); -TORCH_API Tensor* conv2d_depthwise( +TORCH_API Tensor conv2d_depthwise( BufHandle input, BufHandle weight, ExprHandle N, diff --git a/torch/csrc/jit/tensorexpr/operators/matmul.cpp b/torch/csrc/jit/tensorexpr/operators/matmul.cpp index 23cb455..581514c 100644 --- a/torch/csrc/jit/tensorexpr/operators/matmul.cpp +++ b/torch/csrc/jit/tensorexpr/operators/matmul.cpp @@ -5,7 +5,7 @@ namespace torch { namespace jit { namespace tensorexpr { -Tensor* computeMatmul( +Tensor computeMatmul( const std::vector& inputs, const std::vector& outputShape, const c10::optional& outputType) { @@ -44,13 +44,13 @@ Tensor* computeMatmul( }, {{size_a[1], "K"}}); } else { - return new Tensor( + return Tensor( ResultBuf.node(), ExternalCall::make(ResultBuf, "nnc_aten_matmul", {a, b}, {})); } } -Tensor* computeAddMM( +Tensor computeAddMM( const std::vector& inputs, const std::vector& outputShape, const c10::optional& outputType) { @@ -59,7 +59,7 @@ Tensor* computeAddMM( dtype = Dtype(*outputType); } BufHandle ResultBuf("addmm", outputShape, dtype); - return new Tensor( + return Tensor( ResultBuf.node(), ExternalCall::make( ResultBuf, diff --git a/torch/csrc/jit/tensorexpr/operators/matmul.h b/torch/csrc/jit/tensorexpr/operators/matmul.h index 35b30f4..0b52ad6 100644 --- a/torch/csrc/jit/tensorexpr/operators/matmul.h +++ b/torch/csrc/jit/tensorexpr/operators/matmul.h @@ -6,11 +6,11 @@ namespace torch { namespace jit { namespace tensorexpr { -Tensor* computeMatmul( +Tensor computeMatmul( const std::vector& inputs, const std::vector& outputShape, const c10::optional& outputType); -Tensor* computeAddMM( +Tensor computeAddMM( const std::vector& inputs, const std::vector& outputShape, const c10::optional& outputType); diff --git a/torch/csrc/jit/tensorexpr/operators/norm.cpp b/torch/csrc/jit/tensorexpr/operators/norm.cpp index d96ebcd..610f928 100644 --- a/torch/csrc/jit/tensorexpr/operators/norm.cpp +++ b/torch/csrc/jit/tensorexpr/operators/norm.cpp @@ -4,7 +4,7 @@ namespace torch { namespace jit { namespace tensorexpr { -Tensor* computeBatchNorm( +Tensor computeBatchNorm( const std::vector& inputs, const std::vector& outputShape, const c10::optional& outputType) { diff --git a/torch/csrc/jit/tensorexpr/operators/norm.h b/torch/csrc/jit/tensorexpr/operators/norm.h index 98d53b4..7f1412f 100644 --- a/torch/csrc/jit/tensorexpr/operators/norm.h +++ b/torch/csrc/jit/tensorexpr/operators/norm.h @@ -6,7 +6,7 @@ namespace torch { namespace jit { namespace tensorexpr { -Tensor* computeBatchNorm( +Tensor computeBatchNorm( const std::vector& inputs, const std::vector& outputShape, const c10::optional& outputType); diff --git a/torch/csrc/jit/tensorexpr/operators/reduction.cpp b/torch/csrc/jit/tensorexpr/operators/reduction.cpp index c1f3f7f..fe5cb6d 100644 --- a/torch/csrc/jit/tensorexpr/operators/reduction.cpp +++ b/torch/csrc/jit/tensorexpr/operators/reduction.cpp @@ -19,7 +19,7 @@ namespace torch { namespace jit { namespace tensorexpr { -Tensor* computeSum( +Tensor computeSum( const std::vector& inputs, const c10::optional& outputType) { std::vector axes; @@ -100,7 +100,7 @@ Tensor* computeSum( reductionDims); } -Tensor* computeMean( +Tensor computeMean( const std::vector& inputs, const std::vector& outputShape, const c10::optional& outputType) { @@ -120,13 +120,13 @@ Tensor* computeMean( mean_dims_expr.emplace_back(idx); } } - return new Tensor( + return Tensor( ResultBuf.node(), ExternalCall::make( ResultBuf, "nnc_aten_mean", {InputBuf}, mean_dims_expr)); } -Tensor* computeAdaptiveAvgPool2d( +Tensor computeAdaptiveAvgPool2d( const std::vector& inputs, const std::vector& outputShape, const c10::optional& outputType) { @@ -137,7 +137,7 @@ Tensor* computeAdaptiveAvgPool2d( BufHandle ResultBuf("adaptive_avgpool2d", outputShape, dtype); // NOLINTNEXTLINE(performance-unnecessary-copy-initialization) auto out_size_param = c10::get(inputs[1]); - return new Tensor( + return Tensor( ResultBuf.node(), ExternalCall::make( ResultBuf, diff --git a/torch/csrc/jit/tensorexpr/operators/reduction.h b/torch/csrc/jit/tensorexpr/operators/reduction.h index 4335d7b..d76bac6 100644 --- a/torch/csrc/jit/tensorexpr/operators/reduction.h +++ b/torch/csrc/jit/tensorexpr/operators/reduction.h @@ -6,14 +6,14 @@ namespace torch { namespace jit { namespace tensorexpr { -TORCH_API Tensor* computeSum( +TORCH_API Tensor computeSum( const std::vector& inputs, const c10::optional& outputType); -TORCH_API Tensor* computeMean( +TORCH_API Tensor computeMean( const std::vector& inputs, const std::vector& outputShape, const c10::optional& outputType); -TORCH_API Tensor* computeAdaptiveAvgPool2d( +TORCH_API Tensor computeAdaptiveAvgPool2d( const std::vector& inputs, const std::vector& outputShape, const c10::optional& outputType); diff --git a/torch/csrc/jit/tensorexpr/operators/softmax.cpp b/torch/csrc/jit/tensorexpr/operators/softmax.cpp index d6cb6c0..c1c2872 100644 --- a/torch/csrc/jit/tensorexpr/operators/softmax.cpp +++ b/torch/csrc/jit/tensorexpr/operators/softmax.cpp @@ -6,7 +6,7 @@ namespace tensorexpr { using namespace torch::jit::tensorexpr; -Tensor* computeSoftmax( +Tensor computeSoftmax( const std::vector& inputs, const std::vector& outputShape, bool log_softmax) { @@ -111,48 +111,43 @@ Tensor* computeSoftmax( Compute("aten_softmax_exp", output_dims, [&](ParameterList& indices) { auto inp = tensorOrConstant( inputs[0], convert_indices_to_expr_handle(indices)); - return exp(inp - max->load(remove_softmax_dim_index(indices))); + return exp(inp - max.load(remove_softmax_dim_index(indices))); }); auto sum = Reduce( "aten_softmax_sum", non_softmax_dims, Sum(), [&](ParameterList& indices) { - return e->load(move_softmax_dim_index_to_pos(indices)); + return e.load(move_softmax_dim_index_to_pos(indices)); }, {output_dims[softmax_dim]}); if (!log_softmax) { auto result = Compute("aten_softmax", output_dims, [&](ParameterList& indices) { - return e->load(indices) / - sum->load(remove_softmax_dim_index(indices)); + return e.load(indices) / sum.load(remove_softmax_dim_index(indices)); }); - return new Tensor( - result->buf(), + return Tensor( + result.buf(), alloc(std::vector( - {max->stmt(), e->stmt(), sum->stmt(), result->stmt()}))); + {max.stmt(), e.stmt(), sum.stmt(), result.stmt()}))); } auto log_sum = Compute( "aten_softmax_log_sum", non_softmax_dims, [&](ParameterList& indices) { - return log(sum->load(indices)); + return log(sum.load(indices)); }); auto result = Compute("aten_log_softmax", output_dims, [&](ParameterList& indices) { auto inp = tensorOrConstant( inputs[0], convert_indices_to_expr_handle(indices)); auto non_softmax_indices = remove_softmax_dim_index(indices); - return inp - max->load(non_softmax_indices) - - log_sum->load(non_softmax_indices); + return inp - max.load(non_softmax_indices) - + log_sum.load(non_softmax_indices); }); - return new Tensor( - result->buf(), + return Tensor( + result.buf(), alloc(std::vector( - {max->stmt(), - e->stmt(), - sum->stmt(), - log_sum->stmt(), - result->stmt()}))); + {max.stmt(), e.stmt(), sum.stmt(), log_sum.stmt(), result.stmt()}))); } } // namespace tensorexpr diff --git a/torch/csrc/jit/tensorexpr/operators/softmax.h b/torch/csrc/jit/tensorexpr/operators/softmax.h index 07ddd0f..b74a867 100644 --- a/torch/csrc/jit/tensorexpr/operators/softmax.h +++ b/torch/csrc/jit/tensorexpr/operators/softmax.h @@ -6,7 +6,7 @@ namespace torch { namespace jit { namespace tensorexpr { -Tensor* computeSoftmax( +Tensor computeSoftmax( const std::vector& inputs, const std::vector& outputShape, bool log_softmax); diff --git a/torch/csrc/jit/tensorexpr/tensor.cpp b/torch/csrc/jit/tensorexpr/tensor.cpp index 9df70f8..ea3902d 100644 --- a/torch/csrc/jit/tensorexpr/tensor.cpp +++ b/torch/csrc/jit/tensorexpr/tensor.cpp @@ -48,7 +48,7 @@ StmtPtr Tensor::constructStmt( return s; } -Tensor* Compute( +Tensor Compute( const std::string& name, const std::vector& dim_args, const std::function&)>& body_func) { @@ -57,10 +57,10 @@ Tensor* Compute( unpack_dim_args(dim_args, &dims, &args); ExprPtr body = body_func(VarVectorToVarHandleVector(args)).node(); BufPtr buf = alloc(name, dims, body->dtype()); - return new Tensor(buf, args, body); + return Tensor(buf, args, body); } -Tensor* Compute( +Tensor Compute( const std::string& name, const std::vector& dim_args, const std::function& body_func) { @@ -73,10 +73,10 @@ Tensor* Compute( unpack_dim_args(dim_args, &dims, &args); ExprPtr body = body_func(VarHandle(args[0])).node(); BufPtr buf = alloc(name, dims, body->dtype()); - return new Tensor(buf, args, body); + return Tensor(buf, args, body); } -Tensor* Compute( +Tensor Compute( const std::string& name, const std::vector& dim_args, const std::function& @@ -89,10 +89,10 @@ Tensor* Compute( unpack_dim_args(dim_args, &dims, &args); ExprPtr body = body_func(VarHandle(args[0]), VarHandle(args[1])).node(); BufPtr buf = alloc(name, dims, body->dtype()); - return new Tensor(buf, args, body); + return Tensor(buf, args, body); } -Tensor* Compute( +Tensor Compute( const std::string& name, const std::vector& dim_args, const std::function< @@ -108,10 +108,10 @@ Tensor* Compute( body_func(VarHandle(args[0]), VarHandle(args[1]), VarHandle(args[2])) .node(); BufPtr buf = alloc(name, dims, body->dtype()); - return new Tensor(buf, args, body); + return Tensor(buf, args, body); } -Tensor* Compute( +Tensor Compute( const std::string& name, const std::vector& dim_args, const std::function(name, dims, body->dtype()); - return new Tensor(buf, args, body); + return Tensor(buf, args, body); } -Tensor* Reduce( +Tensor Reduce( const std::string& name, const std::vector& dim_args, const Reducer& reducer, @@ -149,7 +149,7 @@ Tensor* Reduce( reduce_args); } -Tensor* Reduce( +Tensor Reduce( const std::string& name, const std::vector& dim_args, const Reducer& reducer, @@ -163,17 +163,17 @@ Tensor* Reduce( reduce_args); } -Tensor* Reduce( +Tensor Reduce( const std::string& name, const std::vector& dim_args, const Reducer& reducer, - Tensor* tensor, + Tensor tensor, const std::vector& reduce_args) { return Reduce( name, dim_args, reducer, - [&](ParameterList& p) { return tensor->load(p); }, + [&](ParameterList& p) { return tensor.load(p); }, reduce_args); } diff --git a/torch/csrc/jit/tensorexpr/tensor.h b/torch/csrc/jit/tensorexpr/tensor.h index 3eb02c6..8d8ffe5 100644 --- a/torch/csrc/jit/tensorexpr/tensor.h +++ b/torch/csrc/jit/tensorexpr/tensor.h @@ -12,7 +12,7 @@ namespace torch { namespace jit { namespace tensorexpr { -class TORCH_API Tensor : KernelScopedObject { +class TORCH_API Tensor { public: // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) Tensor(BufPtr buf, const std::vector& args, ExprPtr body) @@ -42,9 +42,9 @@ class TORCH_API Tensor : KernelScopedObject { } template - inline ExprHandle load(const std::vector& args); + inline ExprHandle load(const std::vector& args) const; template - inline ExprHandle load(const Ts&... ts); + inline ExprHandle load(const Ts&... ts) const; private: StmtPtr constructStmt( @@ -134,22 +134,22 @@ class Placeholder { std::vector strides_; }; -TORCH_API Tensor* Compute( +TORCH_API Tensor Compute( const std::string& func_name, const std::vector& dim_args, const std::function& body_func); -TORCH_API Tensor* Compute( +TORCH_API Tensor Compute( const std::string& func_name, const std::vector& dim_args, const std::function& body_func); -TORCH_API Tensor* Compute( +TORCH_API Tensor Compute( const std::string& func_name, const std::vector& dim_args, const std::function< ExprHandle(const VarHandle&, const VarHandle&, const VarHandle&)>& body_func); -TORCH_API Tensor* Compute( +TORCH_API Tensor Compute( const std::string& func_name, const std::vector& dim_args, const std::function& body_func); -TORCH_API Tensor* Compute( +TORCH_API Tensor Compute( const std::string& func_name, const std::vector& dim_args, const std::function&)>& body_func); @@ -179,7 +179,7 @@ inline void unpack_dim_args( // Handle reductions over a Reducer and a body_func which produces values. template -Tensor* Reduce( +Tensor Reduce( const std::string& func_name, const std::vector& dim_args, const Reducer& reducer, @@ -207,7 +207,7 @@ Tensor* Reduce( .node(); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) BufPtr func_result = alloc(func_name, dims, body->dtype()); - return new Tensor(func_result, vars, body); + return Tensor(func_result, vars, body); } // NOLINTNEXTLINE(cppcoreguidelines-init-variables) @@ -227,13 +227,12 @@ Tensor* Reduce( // NOLINTNEXTLINE(cppcoreguidelines-init-variables) ReduceOpPtr reduce_op = reducer(func_result, body, output_args, reduce_vars); // NOLINTNEXTLINE(cppcoreguidelines-init-variables) - Tensor* t = - new Tensor(func_result, vars, reduce_dims, reduce_vars, reduce_op); + Tensor t = Tensor(func_result, vars, reduce_dims, reduce_vars, reduce_op); return t; } template -Tensor* Reduce( +Tensor Reduce( const std::string& func_name, const std::vector& dim_args, const Reducer& reducer, @@ -250,7 +249,7 @@ Tensor* Reduce( // Overload which allows inline lambda functions for the body_func. template -Tensor* Reduce( +Tensor Reduce( const std::string& func_name, const std::vector& dim_args, const Reducer& reducer, @@ -260,14 +259,14 @@ Tensor* Reduce( } // Overload for the common case of all dimensions of a Placeholder. -TORCH_API Tensor* Reduce( +TORCH_API Tensor Reduce( const std::string& func_name, const std::vector& dim_args, const Reducer& reducer, const Placeholder& buffer, const std::vector& reduce_args); -TORCH_API Tensor* Reduce( +TORCH_API Tensor Reduce( const std::string& name, const std::vector& dim_args, const Reducer& reducer, @@ -276,22 +275,22 @@ TORCH_API Tensor* Reduce( // Overload for the common case of all dimensions of a prevously Computed // Tensor. -TORCH_API Tensor* Reduce( +TORCH_API Tensor Reduce( const std::string& func_name, const std::vector& dim_args, const Reducer& reducer, - Tensor* tensor, + Tensor tensor, const std::vector& reduce_args); template -inline ExprHandle Tensor::load(const Ts&... ts) { +inline ExprHandle Tensor::load(const Ts&... ts) const { // NOLINTNEXTLINE(cppcoreguidelines-init-variables) std::vector params({ExprHandle(ts)...}); return Load::make(BufHandle(this->buf()), params); } template -inline ExprHandle Tensor::load(const std::vector& args) { +inline ExprHandle Tensor::load(const std::vector& args) const { // NOLINTNEXTLINE(cppcoreguidelines-init-variables) std::vector params(args.begin(), args.end()); return Load::make(BufHandle(this->buf()), params); diff --git a/torch/csrc/jit/tensorexpr/tensorexpr_init.cpp b/torch/csrc/jit/tensorexpr/tensorexpr_init.cpp index 4e1618a..c380233 100644 --- a/torch/csrc/jit/tensorexpr/tensorexpr_init.cpp +++ b/torch/csrc/jit/tensorexpr/tensorexpr_init.cpp @@ -185,9 +185,9 @@ void initTensorExprBindings(PyObject* module) { const std::vector& args, const ExprHandle& val) { return self.store(args, val); }) .def("data", [](Placeholder& self) { return BufHandle(self.data()); }); - py::class_>(te, "Tensor") - .def(py::init( - [](BufHandle& b, StmtPtr s) { return new Tensor(b.node(), s); })) + py::class_(te, "Tensor") + .def( + py::init([](BufHandle& b, StmtPtr s) { return Tensor(b.node(), s); })) .def( "load", [](Tensor& self, const std::vector& v) { @@ -268,7 +268,7 @@ void initTensorExprBindings(PyObject* module) { [](const std::string& func_name, const std::vector& dim_args, const Reducer& reducer, - Tensor* buffer, + Tensor buffer, const std::vector& reduce_args) { return Reduce(func_name, dim_args, reducer, buffer, reduce_args); }, @@ -380,7 +380,7 @@ void initTensorExprBindings(PyObject* module) { .def(py::init(&ExternalCall::make)); py::class_(te, "LoopNest") - .def(py::init&>()) + .def(py::init&>()) .def(py::init([](StmtPtr s, const std::vector& bufs) { std::unordered_set buf_nodes; for (auto& buf : bufs) { @@ -392,9 +392,7 @@ void initTensorExprBindings(PyObject* module) { .def("prepare_for_codegen", &LoopNest::prepareForCodegen) .def( "get_loop_body_for", - [](const LoopNest& self, Tensor* t) { - return self.getLoopBodyFor(t); - }, + [](const LoopNest& self, Tensor t) { return self.getLoopBodyFor(t); }, py::return_value_policy::reference) .def( "get_loop_body_for", @@ -404,7 +402,7 @@ void initTensorExprBindings(PyObject* module) { py::return_value_policy::reference) .def( "get_loops_for", - [](const LoopNest& self, Tensor* t) { + [](const LoopNest& self, Tensor t) { return self.getLoopStmtsFor(t); }, py::return_value_policy::reference) @@ -760,12 +758,12 @@ void initTensorExprBindings(PyObject* module) { py::class_(te, "BufferArg") .def(py::init()) - .def(py::init()) + .def(py::init()) .def(py::init()) .def(py::init()); py::implicitly_convertible(); - py::implicitly_convertible(); + py::implicitly_convertible(); py::implicitly_convertible(); py::implicitly_convertible(); -- 2.7.4