#include <arm_compute/runtime/CL/functions/CLActivationLayer.h>
#include <arm_compute/runtime/CL/functions/CLActivationLayerEx.h>
#include <arm_compute/runtime/CL/functions/CLScale.h>
+#include <arm_compute/runtime/CL/functions/CLSpaceToBatchND.h>
#include <arm_compute/runtime/CL/functions/CLSpaceToDepth.h>
#include <arm_compute/runtime/CL/functions/CLReshapeLayer.h>
#include <arm_compute/runtime/CL/functions/CLStridedSlice.h>
auto block_size_alloc = ctx.at(::internal::tflite::operand::Index{param.block_size_index});
auto padding_size_alloc = ctx.at(::internal::tflite::operand::Index{param.padding_size_index});
- auto fn = nnfw::make_unique<SimpleSpaceToBatchND>();
+ // NOTE SimpleSpaceToBatchND is quite slow
+ if (from_env<bool>(std::getenv("USE_SIMPLE_SPACE_TO_BATCH_ND")))
+ {
+ auto fn = nnfw::make_unique<SimpleSpaceToBatchND>();
- fn->configure(input_alloc, block_size_alloc, padding_size_alloc, output_alloc,
- getARMComputeAxises(param.rank));
- builder.append("SpaceToBatchND", std::move(fn));
+ fn->configure(input_alloc, block_size_alloc, padding_size_alloc, output_alloc,
+ getARMComputeAxises(param.rank));
+ builder.append("SpaceToBatchND", std::move(fn));
+ }
+ else if (::internal::arm_compute::isGpuMode())
+ {
+ auto fn = nnfw::make_unique<::arm_compute::CLSpaceToBatchND>();
+ fn->configure(CAST_CL(input_alloc), CAST_CL(block_size_alloc), CAST_CL(padding_size_alloc),
+ CAST_CL(output_alloc));
+ builder.append("SpaceToBatchND", std::move(fn));
+ }
+ else
+ {
+ // TODO Enable NEON Support
+ throw std::runtime_error("Not supported, yet");
+ }
};
_builder.addStage(stage);