virtual IWorkloadFactoryPtr CreateWorkloadFactory(
class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const;
+ virtual IWorkloadFactoryPtr CreateWorkloadFactory(
+ const IMemoryManagerSharedPtr& memoryManager,
+ const ModelOptions& modelOptions) const;
+
+ virtual IWorkloadFactoryPtr CreateWorkloadFactory(
+ class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
+ const ModelOptions& modelOptions) const;
+
/// Create the runtime context of the backend
///
/// Implementations may return a default-constructed IBackendContextPtr if
if (backend->SupportsTensorAllocatorAPI())
{
- auto workloadFactory = backend->CreateWorkloadFactory(m_TensorHandleFactoryRegistry);
+ auto workloadFactory = backend->CreateWorkloadFactory(
+ m_TensorHandleFactoryRegistry, m_OptimizedNetwork->GetModelOptions());
m_WorkloadFactories.emplace(
std::make_pair(backendId, std::make_pair(std::move(workloadFactory), nullptr)));
}
else
{
IBackendInternal::IMemoryManagerSharedPtr memoryManager = backend->CreateMemoryManager();
- auto workloadFactory = backend->CreateWorkloadFactory(memoryManager);
+ auto workloadFactory = backend->CreateWorkloadFactory(
+ memoryManager, m_OptimizedNetwork->GetModelOptions());
m_WorkloadFactories.emplace(
std::make_pair(backendId, std::make_pair(std::move(workloadFactory), memoryManager)));
ARMNN_ASSERT_MSG(workloadFactory, "No workload factory");
std::string reasonIfUnsupported;
- ARMNN_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(layer, {}, reasonIfUnsupported),
+ ARMNN_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(layer,
+ {},
+ reasonIfUnsupported,
+ m_OptimizedNetwork->GetModelOptions()),
"Factory does not support layer");
IgnoreUnused(reasonIfUnsupported);
return *workloadFactory;
// Calls CreateWorkload for a layer, and checks the returned pointer is of the correct type.
template<typename Workload>
-std::unique_ptr<Workload> MakeAndCheckWorkload(Layer& layer, const IWorkloadFactory& factory)
+std::unique_ptr<Workload> MakeAndCheckWorkload(Layer& layer,
+ const IWorkloadFactory& factory,
+ const ModelOptions& modelOptions = {})
{
std::unique_ptr<IWorkload> workload = layer.CreateWorkload(factory);
BOOST_TEST(workload.get() == PolymorphicDowncast<Workload*>(workload.get()),
"Cannot convert to derived class");
std::string reasonIfUnsupported;
layer.SetBackendId(factory.GetBackendId());
- BOOST_TEST(factory.IsLayerSupported(layer, layer.GetDataType(), reasonIfUnsupported));
+ BOOST_TEST(factory.IsLayerSupported(layer, layer.GetDataType(), reasonIfUnsupported, modelOptions));
return std::unique_ptr<Workload>(static_cast<Workload*>(workload.release()));
}
template <typename Convolution2dWorkload, armnn::DataType DataType>
std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory,
armnn::Graph& graph,
- DataLayout dataLayout = DataLayout::NCHW)
+ DataLayout dataLayout = DataLayout::NCHW,
+ const ModelOptions& modelOptions = {})
{
// Creates the layer we're testing.
Convolution2dDescriptor layerDesc;
CreateTensorHandles(graph, factory);
// Makes the workload and checks it.
- auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory);
+ auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory, modelOptions);
Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
BOOST_TEST(queueDescriptor.m_Parameters.m_StrideX == 2);
return IWorkloadFactoryPtr{};
}
+IBackendInternal::IWorkloadFactoryPtr IBackendInternal::CreateWorkloadFactory(
+ const IMemoryManagerSharedPtr& memoryManager,
+ const ModelOptions& modelOptions) const
+{
+ if(modelOptions.empty())
+ {
+ return CreateWorkloadFactory(memoryManager);
+ }
+ return IWorkloadFactoryPtr{};
+}
+
+IBackendInternal::IWorkloadFactoryPtr IBackendInternal::CreateWorkloadFactory(
+ class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
+ const ModelOptions& modelOptions) const
+{
+ if(modelOptions.empty())
+ {
+ return CreateWorkloadFactory(tensorHandleFactoryRegistry);
+ }
+ return IWorkloadFactoryPtr{};
+}
+
IBackendInternal::IBackendContextPtr IBackendInternal::CreateBackendContext(const IRuntime::CreationOptions&) const
{
return IBackendContextPtr{};
modelOptions);
}
+bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId,
+ const IConnectableLayer& connectableLayer,
+ Optional<DataType> dataType,
+ std::string& outReasonIfUnsupported,
+ const ModelOptions& modelOptions)
+{
+ return IsLayerConfigurationSupported(backendId,
+ connectableLayer,
+ dataType,
+ outReasonIfUnsupported,
+ modelOptions);
+}
+
// Default Implementations
std::unique_ptr<IWorkload> IWorkloadFactory::CreateAbs(const AbsQueueDescriptor& /*descriptor*/,
const WorkloadInfo& /*info*/) const
std::string& outReasonIfUnsupported,
const ModelOptions& modelOptions);
+ static bool IsLayerSupported(const BackendId& backendId,
+ const IConnectableLayer& layer,
+ Optional<DataType> dataType,
+ std::string& outReasonIfUnsupported,
+ const ModelOptions& modelOptions);
+
virtual bool SupportsSubTensors() const = 0;
ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateSubTensorHandle instead")
}
IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
+ const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
+{
+ return std::make_unique<ClWorkloadFactory>(
+ PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
+}
+
+IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
TensorHandleFactoryRegistry& registry) const
{
auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
}
+IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
+ TensorHandleFactoryRegistry& registry, const ModelOptions& modelOptions) const
+{
+ auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+
+ registry.RegisterMemoryManager(memoryManager);
+ registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
+
+ return std::make_unique<ClWorkloadFactory>(
+ PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
+}
+
std::vector<ITensorHandleFactory::FactoryId> ClBackend::GetHandleFactoryPreferences() const
{
return std::vector<ITensorHandleFactory::FactoryId> {ClTensorHandleFactory::GetIdStatic()};
IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(
TensorHandleFactoryRegistry& registry) const override;
+ IWorkloadFactoryPtr CreateWorkloadFactory( const IMemoryManagerSharedPtr& memoryManager,
+ const ModelOptions& modelOptions) const override;
+
+ IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
+ const ModelOptions& modelOptions) const override;
+
std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override;
void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override;
//
#include "ClWorkloadFactory.hpp"
#include "ClBackendId.hpp"
+#include "ClBackendModelContext.hpp"
#include <Layer.hpp>
return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
}
+bool ClWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer,
+ Optional<DataType> dataType,
+ std::string& outReasonIfUnsupported,
+ const ModelOptions& modelOptions)
+{
+ return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
+}
+
const BackendId& ClWorkloadFactory::GetBackendId() const
{
return s_Id;
}
ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager)
- : m_MemoryManager(memoryManager)
+ : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{})
+{
+}
+
+ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager,
+ const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr)
+ : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
{
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClConvolution2dWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
+ bool isFastMathEnabled = false;
+ if (m_ModelContextPtr)
+ {
+ if (m_ModelContextPtr.get() != nullptr)
+ {
+ auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
+ if (modelOptions)
+ {
+ isFastMathEnabled = modelOptions->IsFastMathEnabled();
+ }
+ }
+ }
+ return MakeWorkload<ClConvolution2dWorkload>(descriptor,
+ info,
+ m_MemoryManager->GetIntraLayerManager(),
+ isFastMathEnabled);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
#include <armnn/IRuntime.hpp>
#include <armnn/Optional.hpp>
+#include <armnn/backends/IBackendInternal.hpp>
+
#include <backendsCommon/WorkloadFactoryBase.hpp>
#include <aclCommon/BaseMemoryManager.hpp>
public:
ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager);
+ ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager,
+ const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr);
+
const BackendId& GetBackendId() const override;
static bool IsLayerSupported(const Layer& layer,
Optional<DataType> dataType,
std::string& outReasonIfUnsupported);
+ static bool IsLayerSupported(const IConnectableLayer& layer,
+ Optional<DataType> dataType,
+ std::string& outReasonIfUnsupported,
+ const ModelOptions& modelOptions);
+
bool SupportsSubTensors() const override { return true; }
ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateSubTensorHandle instead")
Args&&... args);
mutable std::shared_ptr<ClMemoryManager> m_MemoryManager;
+ const IBackendInternal::IBackendSpecificModelContextPtr m_ModelContextPtr;
};
} // namespace armnn
#include "ClContextControlFixture.hpp"
#include "ClWorkloadFactoryHelper.hpp"
+#include <armnn/utility/Assert.hpp>
+#include <armnn/utility/IgnoreUnused.hpp>
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <backendsCommon/MemCopyWorkload.hpp>
ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
}
+BOOST_AUTO_TEST_CASE(CreateConvolution2dFastMathEnabledWorkload)
+{
+ Graph graph;
+
+ using ModelOptions = std::vector<BackendOptions>;
+ ModelOptions modelOptions = {};
+ BackendOptions gpuAcc("GpuAcc",
+ {
+ { "FastMathEnabled", true }
+ });
+ modelOptions.push_back(gpuAcc);
+
+ ClWorkloadFactory factory =
+ ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager(), modelOptions);
+
+ auto workload =
+ CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(factory,
+ graph,
+ DataLayout::NCHW,
+ modelOptions);
+
+ ARMNN_ASSERT(workload != nullptr);
+ auto conv2dWorkload = PolymorphicDowncast<ClConvolution2dWorkload*>(workload.get());
+ IgnoreUnused(conv2dWorkload);
+ ARMNN_ASSERT(conv2dWorkload != nullptr);
+ // fast_math enabled but configuration does not match with WINOGRAD
+ ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::GEMM);
+}
+
template <typename DepthwiseConvolutionWorkloadType, typename armnn::DataType DataType>
static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout)
{
}
static armnn::ClWorkloadFactory GetFactory(
- const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ModelOptions& modelOptions = {})
{
- return armnn::ClWorkloadFactory(armnn::PolymorphicPointerDowncast<armnn::ClMemoryManager>(memoryManager));
+ armnn::ClBackend backend;
+ return armnn::ClWorkloadFactory(armnn::PolymorphicPointerDowncast<armnn::ClMemoryManager>(memoryManager),
+ backend.CreateBackendSpecificModelContext(modelOptions));
}
static armnn::ClTensorHandleFactory GetTensorHandleFactory(
}
ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor,
- const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const bool isFastMathEnabled)
: BaseWorkload<Convolution2dQueueDescriptor>(descriptor, info)
, m_ConvolutionLayer(memoryManager)
{
&output,
padStrideInfo,
arm_compute::WeightsInfo(),
- aclDilationInfo);
+ aclDilationInfo,
+ arm_compute::ActivationLayerInfo(),
+ isFastMathEnabled);
+
+ m_ConvolutionMethod =
+ m_ConvolutionLayer.get_convolution_method(input.info(),
+ m_KernelTensor->info(),
+ output.info(),
+ padStrideInfo,
+ arm_compute::WeightsInfo(),
+ arm_compute::ActivationLayerInfo(),
+ arm_compute::CLScheduler::get().target(),
+ aclDilationInfo,
+ isFastMathEnabled);
InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight);
RunClFunction(m_ConvolutionLayer, CHECK_LOCATION());
}
+arm_compute::ConvolutionMethod ClConvolution2dWorkload::GetConvolutionMethod() const
+{
+ return m_ConvolutionMethod;
+}
+
void ClConvolution2dWorkload::FreeUnusedTensors()
{
FreeTensorIfUnused(m_KernelTensor);
class ClConvolution2dWorkload : public BaseWorkload<Convolution2dQueueDescriptor>
{
public:
- ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
+ ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const bool isFastMathEnabled = false);
void Execute() const override;
+ arm_compute::ConvolutionMethod GetConvolutionMethod() const;
+
private:
mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer;
std::unique_ptr<arm_compute::CLTensor> m_KernelTensor;
std::unique_ptr<arm_compute::CLTensor> m_BiasTensor;
+ arm_compute::ConvolutionMethod m_ConvolutionMethod;
+
void FreeUnusedTensors();
};
}
IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
+ const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
+{
+ return std::make_unique<NeonWorkloadFactory>(
+ PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
+}
+
+IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const
{
auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
}
+IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
+ TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, const ModelOptions& modelOptions) const
+{
+ auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
+ BaseMemoryManager::MemoryAffinity::Offset);
+
+ tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);
+ tensorHandleFactoryRegistry.RegisterFactory(std::make_unique<NeonTensorHandleFactory>(memoryManager));
+
+ return std::make_unique<NeonWorkloadFactory>(
+ PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
+}
+
IBackendInternal::IBackendContextPtr NeonBackend::CreateBackendContext(const IRuntime::CreationOptions&) const
{
return IBackendContextPtr{};
IWorkloadFactoryPtr CreateWorkloadFactory(
class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const override;
+ IWorkloadFactoryPtr CreateWorkloadFactory( const IMemoryManagerSharedPtr& memoryManager,
+ const ModelOptions& modelOptions) const override;
+
+ IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
+ const ModelOptions& modelOptions) const override;
+
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override;
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(
const IRuntime::CreationOptions&, IBackendProfilingPtr& backendProfiling) override;
{
if (m_ModelContextPtr.get() != nullptr)
{
- auto modelOptions = armnn::PolymorphicDowncast<NeonBackendModelContext*>(m_ModelContextPtr.get());
+ auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
if (modelOptions)
{
isFastMathEnabled = modelOptions->IsFastMathEnabled();
//
#include "NeonBackendId.hpp"
+#include "NeonBackendModelContext.hpp"
#include "NeonTensorHandle.hpp"
#include "NeonWorkloadFactory.hpp"
return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
}
+bool NeonWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer,
+ Optional<DataType> dataType,
+ std::string& outReasonIfUnsupported,
+ const ModelOptions& modelOptions)
+{
+ return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
+}
+
const BackendId& NeonWorkloadFactory::GetBackendId() const
{
return s_Id;
}
NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager)
- : m_MemoryManager(memoryManager)
+ : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{})
+{
+}
+
+NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager,
+ const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr)
+ : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
{
}
std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d(
const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
{
- return std::make_unique<NeonConvolution2dWorkload>(descriptor, info,
- m_MemoryManager->GetIntraLayerManager());
+ bool isFastMathEnabled = false;
+ if (m_ModelContextPtr)
+ {
+ if (m_ModelContextPtr.get() != nullptr)
+ {
+ auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
+ if (modelOptions)
+ {
+ isFastMathEnabled = modelOptions->IsFastMathEnabled();
+ }
+ }
+ }
+ return std::make_unique<NeonConvolution2dWorkload>(descriptor,
+ info,
+ m_MemoryManager->GetIntraLayerManager(),
+ isFastMathEnabled);
}
std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
#pragma once
#include <armnn/Optional.hpp>
+#include <armnn/backends/IBackendInternal.hpp>
#include <backendsCommon/WorkloadFactoryBase.hpp>
#include <aclCommon/BaseMemoryManager.hpp>
public:
NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager);
+ NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager,
+ const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr);
+
const BackendId& GetBackendId() const override;
static bool IsLayerSupported(const Layer& layer,
Optional<DataType> dataType,
std::string& outReasonIfUnsupported);
+ static bool IsLayerSupported(const IConnectableLayer& layer,
+ Optional<DataType> dataType,
+ std::string& outReasonIfUnsupported,
+ const ModelOptions& modelOptions);
+
bool SupportsSubTensors() const override { return true; }
ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateSubTensorHandle instead")
private:
mutable std::shared_ptr<NeonMemoryManager> m_MemoryManager;
+ const IBackendInternal::IBackendSpecificModelContextPtr m_ModelContextPtr;
};
} // namespace armnn
#include "NeonWorkloadFactoryHelper.hpp"
#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/Assert.hpp>
+#include <armnn/utility/IgnoreUnused.hpp>
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <backendsCommon/MemCopyWorkload.hpp>
NeonCreateConvolution2dWorkloadTest<DataType::Float32>(DataLayout::NHWC);
}
+BOOST_AUTO_TEST_CASE(CreateConvolution2dFastMathEnabledWorkload)
+{
+ Graph graph;
+ using ModelOptions = std::vector<BackendOptions>;
+ ModelOptions modelOptions = {};
+ BackendOptions cpuAcc("CpuAcc",
+ {
+ { "FastMathEnabled", true }
+ });
+ modelOptions.push_back(cpuAcc);
+ NeonWorkloadFactory factory =
+ NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager(), modelOptions);
+
+ auto workload =
+ CreateConvolution2dWorkloadTest<NeonConvolution2dWorkload, armnn::DataType::Float32>(factory,
+ graph,
+ DataLayout::NCHW,
+ modelOptions);
+
+ ARMNN_ASSERT(workload != nullptr);
+ auto conv2dWorkload = PolymorphicDowncast<NeonConvolution2dWorkload*>(workload.get());
+ IgnoreUnused(conv2dWorkload);
+ ARMNN_ASSERT(conv2dWorkload != nullptr);
+ // fast_math enabled but configuration does not match with WINOGRAD
+ ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::GEMM);
+}
+
template <typename armnn::DataType DataType>
static void NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout)
{
}
static armnn::NeonWorkloadFactory GetFactory(
- const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ModelOptions& modelOptions = {})
{
- return armnn::NeonWorkloadFactory(
- armnn::PolymorphicPointerDowncast<armnn::NeonMemoryManager>(memoryManager));
+ armnn::NeonBackend backend;
+ return armnn::NeonWorkloadFactory(armnn::PolymorphicPointerDowncast<armnn::NeonMemoryManager>(memoryManager),
+ backend.CreateBackendSpecificModelContext(modelOptions));
}
static armnn::NeonTensorHandleFactory GetTensorHandleFactory(
}
NeonConvolution2dWorkload::NeonConvolution2dWorkload(
- const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ const Convolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const bool isFastMathEnabled)
: BaseWorkload<Convolution2dQueueDescriptor>(descriptor, info)
{
using arm_compute::NEDirectConvolutionLayer;
&output,
padStrideInfo,
arm_compute::WeightsInfo(),
- aclDilationInfo);
+ aclDilationInfo,
+ arm_compute::ActivationLayerInfo(),
+ isFastMathEnabled);
+
+ m_ConvolutionMethod =
+ convolutionLayer->get_convolution_method(input.info(),
+ m_KernelTensor->info(),
+ output.info(),
+ padStrideInfo,
+ arm_compute::WeightsInfo(),
+ aclDilationInfo,
+ arm_compute::ActivationLayerInfo(),
+ isFastMathEnabled);
m_ConvolutionLayer.reset(convolutionLayer.release());
m_ConvolutionLayer->run();
}
+arm_compute::ConvolutionMethod NeonConvolution2dWorkload::GetConvolutionMethod() const
+{
+ return m_ConvolutionMethod;
+}
+
void NeonConvolution2dWorkload::FreeUnusedTensors()
{
FreeTensorIfUnused(m_KernelTensor);
public:
using BaseWorkload<Convolution2dQueueDescriptor>::m_Data;
- NeonConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
+ NeonConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const bool isFastMathENabled = false);
void Execute() const override;
+ arm_compute::ConvolutionMethod GetConvolutionMethod() const;
+
private:
std::unique_ptr<arm_compute::IFunction> m_ConvolutionLayer;
std::unique_ptr<arm_compute::Tensor> m_KernelTensor;
std::unique_ptr<arm_compute::Tensor> m_BiasTensor;
+ arm_compute::ConvolutionMethod m_ConvolutionMethod;
+
void FreeUnusedTensors();
};
return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
}
+bool RefWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer,
+ Optional<DataType> dataType,
+ std::string& outReasonIfUnsupported,
+ const ModelOptions& modelOptions)
+{
+ return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
+}
+
std::unique_ptr<ITensorHandle> RefWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
const bool isMemoryManaged) const
{
Optional<DataType> dataType,
std::string& outReasonIfUnsupported);
+ static bool IsLayerSupported(const IConnectableLayer& layer,
+ Optional<DataType> dataType,
+ std::string& outReasonIfUnsupported,
+ const ModelOptions& modelOptions);
+
bool SupportsSubTensors() const override { return false; }
ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateSubTensorHandle instead")