{
uint32_t axis_offset = 0;
- auto fn = [&](::neurun::backend::operand::ITensor &out_tensor,
- ::neurun::backend::operand::ITensor &in_tensor) {
+ auto outout_fn = [&](::neurun::backend::operand::ITensor &out_tensor) {
+ for (auto input : _input_allocs)
{
auto &out_cl_tensor =
static_cast<::neurun::backend::acl_cl::operand::ICLTensor &>(out_tensor);
- {
+ auto input_fn = [&](::neurun::backend::operand::ITensor &in_tensor) {
auto &in_cl_tensor =
static_cast<::neurun::backend::acl_cl::operand::ICLTensor &>(in_tensor);
for (uint32_t i = 0; i < in_cl_tensor.info()->dimension(0); i++)
axis_offset += in_cl_tensor.info()->dimension(2);
if (_axis == 3)
axis_offset += in_cl_tensor.info()->dimension(3);
- }
+ };
+ input->access(input_fn);
}
};
-
- for (auto input : _input_allocs)
- {
- _output_alloc->access(fn, *input);
- }
+ _output_alloc->access(outout_fn);
}
VERBOSE(Concat_RUN) << "End Concat" << std::endl;
{
namespace operand
{
-std::mutex Object::_mu{};
+
void Object::access(const std::function<void(backend::operand::ITensor &tensor)> &fn) const
{
- // This is an optional input
- if (_tensor->total_size() == 0)
- return;
-
auto &queue = ::arm_compute::CLScheduler::get().queue();
- // Calling access for more than one tensor causing an error in map(). May be sum of memory
- // requests are too large.
- // TODO: Need to investigate. Ideally it must have a non-static mutex and
- // counter and in case of parallel call of THE SAME OBJECT,
- // call the map() for the first one and unmap() for the last one
- std::lock_guard<std::mutex> lock{_mu};
- _tensor->map(queue);
-
- fn(*_tensor);
- _tensor->unmap(queue);
-}
-
-void Object::access(const std::function<void(backend::operand::ITensor &tensor,
- backend::operand::ITensor &tensor_other)> &fn,
- backend::acl_cl::operand::Object &tensor_other) const
-{
// This is an optional input
- if (_tensor->total_size() == 0 || tensor_other.ptr()->total_size() == 0)
+ if (_tensor->total_size() == 0)
return;
- auto &queue = ::arm_compute::CLScheduler::get().queue();
- // Calling access for more than one tensor causing an error in map(). May be sum of memory
- // requests are too large.
- // TODO: Need to investigate. Ideally it must have a non-static mutex and
- // counter and in case of parallel call of THE SAME OBJECT,
- // call the map() for the first one and unmap() for the last one
- std::lock_guard<std::mutex> lock{_mu};
_tensor->map(queue);
- tensor_other.ptr()->map(queue);
-
- fn(*_tensor, *tensor_other.ptr());
-
- tensor_other.ptr()->unmap(queue);
+ fn(*_tensor);
_tensor->unmap(queue);
}
#define __NEURUN_BACKEND_ACL_CL_OPERAND_OBJECT_H__
#include <memory>
-#include <mutex>
#include <backend/operand/IObject.h>
#include "operand/ICLTensor.h"
private:
std::shared_ptr<acl_cl::operand::ICLTensor> _tensor;
- static std::mutex _mu;
public:
void access(const std::function<void(backend::operand::ITensor &tensor)> &fn) const override;
- void access(const std::function<void(backend::operand::ITensor &tensor,
- backend::operand::ITensor &tensor_other)> &fn,
- backend::acl_cl::operand::Object &tensor_other) const;
};
} // namespace operand