src/armnn/layers/Convolution2dLayer.cpp \
src/armnn/layers/ConvertBf16ToFp32Layer.cpp \
src/armnn/layers/ConvertFp16ToFp32Layer.cpp \
+ src/armnn/layers/ConvertFp32ToBf16Layer.cpp \
src/armnn/layers/ConvertFp32ToFp16Layer.cpp \
src/armnn/layers/DebugLayer.cpp \
src/armnn/layers/DepthToSpaceLayer.cpp \
src/armnn/layers/ConvertBf16ToFp32Layer.hpp
src/armnn/layers/ConvertFp16ToFp32Layer.hpp
src/armnn/layers/ConvertFp16ToFp32Layer.cpp
+ src/armnn/layers/ConvertFp32ToBf16Layer.hpp
+ src/armnn/layers/ConvertFp32ToBf16Layer.cpp
src/armnn/layers/ConvertFp32ToFp16Layer.hpp
src/armnn/layers/ConvertFp32ToFp16Layer.cpp
src/armnn/layers/DebugLayer.hpp
const TensorInfo& output,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
+ virtual bool IsConvertFp32ToBf16Supported(const TensorInfo& input,
+ const TensorInfo& output,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
+
virtual bool IsConvertFp16ToFp32Supported(const TensorInfo& input,
const TensorInfo& output,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
case LayerType::Constant: return "Constant";
case LayerType::ConvertBf16ToFp32: return "ConvertBf16ToFp32";
case LayerType::ConvertFp16ToFp32: return "ConvertFp16ToFp32";
+ case LayerType::ConvertFp32ToBf16: return "ConvertFp32ToBf16";
case LayerType::ConvertFp32ToFp16: return "ConvertFp32ToFp16";
case LayerType::Convolution2d: return "Convolution2d";
case LayerType::Debug: return "Debug";
Constant,
ConvertBf16ToFp32,
ConvertFp16ToFp32,
+ ConvertFp32ToBf16,
ConvertFp32ToFp16,
Convolution2d,
Debug,
#include "layers/ConstantLayer.hpp"
#include "layers/ConvertBf16ToFp32Layer.hpp"
#include "layers/ConvertFp16ToFp32Layer.hpp"
+#include "layers/ConvertFp32ToBf16Layer.hpp"
#include "layers/ConvertFp32ToFp16Layer.hpp"
#include "layers/Convolution2dLayer.hpp"
#include "layers/DebugLayer.hpp"
DECLARE_LAYER(Constant)
DECLARE_LAYER(ConvertBf16ToFp32)
DECLARE_LAYER(ConvertFp16ToFp32)
+DECLARE_LAYER(ConvertFp32ToBf16)
DECLARE_LAYER(ConvertFp32ToFp16)
DECLARE_LAYER(Convolution2d)
DECLARE_LAYER(Debug)
{
public:
/// Makes a workload for the ConvertBf16ToFp32 type.
- /// @param [in] graph The graph where this layer can be found.
/// @param [in] factory The workload factory which will create the workload.
/// @return A pointer to the created workload, or nullptr if not created.
virtual std::unique_ptr<IWorkload> CreateWorkload(const IWorkloadFactory& factory) const override;
{
public:
/// Makes a workload for the ConvertFp16ToFp32 type.
- /// @param [in] graph The graph where this layer can be found.
/// @param [in] factory The workload factory which will create the workload.
/// @return A pointer to the created workload, or nullptr if not created.
virtual std::unique_ptr<IWorkload> CreateWorkload(const IWorkloadFactory& factory) const override;
--- /dev/null
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ConvertFp32ToBf16Layer.hpp"
+#include "LayerCloneBase.hpp"
+
+#include <armnn/TypesUtils.hpp>
+
+#include <backendsCommon/WorkloadData.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+namespace armnn
+{
+
+ConvertFp32ToBf16Layer::ConvertFp32ToBf16Layer(const char* name)
+ : Layer(1, 1, LayerType::ConvertFp32ToBf16, name)
+{
+}
+
+std::unique_ptr<IWorkload> ConvertFp32ToBf16Layer::CreateWorkload(const IWorkloadFactory& factory) const
+{
+ ConvertFp32ToBf16QueueDescriptor descriptor;
+ return factory.CreateConvertFp32ToBf16(descriptor, PrepInfoAndDesc(descriptor));
+}
+
+ConvertFp32ToBf16Layer* ConvertFp32ToBf16Layer::Clone(Graph& graph) const
+{
+ return CloneBase<ConvertFp32ToBf16Layer>(graph, GetName());
+}
+
+void ConvertFp32ToBf16Layer::ValidateTensorShapesFromInputs()
+{
+ VerifyLayerConnections(1, CHECK_LOCATION());
+
+ auto inferredShapes = InferOutputShapes({ GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape() });
+
+ BOOST_ASSERT(inferredShapes.size() == 1);
+
+ ConditionalThrowIfNotEqual<LayerValidationException>(
+ "ConvertFp32ToBf16Layer: TensorShape set on OutputSlot[0] does not match the inferred shape.",
+ GetOutputSlot(0).GetTensorInfo().GetShape(),
+ inferredShapes[0]);
+}
+
+void ConvertFp32ToBf16Layer::Accept(ILayerVisitor& visitor) const
+{
+ // these conversion layers are only inserted by the
+ // optimizer and so will never be in an input graph.
+ IgnoreUnused(visitor);
+ throw armnn::Exception("ConvertFp32ToBf16Layer should never appear in an input graph");
+}
+
+} // namespace armnn
--- /dev/null
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <Layer.hpp>
+
+namespace armnn
+{
+
+/// This layer converts data type Float32 to BFloat16.
+class ConvertFp32ToBf16Layer : public Layer
+{
+public:
+ /// Makes a workload for the ConvertFp32ToBf16Layer type.
+ /// @param [in] factory The workload factory which will create the workload.
+ /// @return A pointer to the created workload, or nullptr if not created.
+ virtual std::unique_ptr<IWorkload> CreateWorkload(const IWorkloadFactory& factory) const override;
+
+ /// Creates a dynamically-allocated copy of this layer.
+ /// @param [in] graph The graph into which this layer is being cloned.
+ ConvertFp32ToBf16Layer* Clone(Graph& graph) const override;
+
+ /// Check if the input tensor shape(s)
+ /// will lead to a valid configuration of @ref ConvertFp32ToBf16Layer.
+ void ValidateTensorShapesFromInputs() override;
+
+ void Accept(ILayerVisitor& visitor) const override;
+
+protected:
+ /// Constructor to create a ConvertFp32ToBf16Layer.
+ /// @param [in] name Optional name for the layer.
+ ConvertFp32ToBf16Layer(const char* name);
+
+ /// Default destructor
+ ~ConvertFp32ToBf16Layer() = default;
+};
+
+} // namespace
{
public:
/// Makes a workload for the ConvertFp32ToFp16 type.
- /// @param [in] graph The graph where this layer can be found.
/// @param [in] factory The workload factory which will create the workload.
/// @return A pointer to the created workload, or nullptr if not created.
virtual std::unique_ptr<IWorkload> CreateWorkload(const IWorkloadFactory& factory) const override;
return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
}
+bool LayerSupportBase::IsConvertFp32ToBf16Supported(const TensorInfo& /*input*/,
+ const TensorInfo& /*output*/,
+ Optional<std::string&> reasonIfUnsupported) const
+{
+ return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
+}
+
+
bool LayerSupportBase::IsConvertFp32ToFp16Supported(const TensorInfo& /*input*/,
const TensorInfo& /*output*/,
Optional<std::string&> reasonIfUnsupported) const
const TensorInfo& output,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+ bool IsConvertFp32ToBf16Supported(const TensorInfo& input,
+ const TensorInfo& output,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
bool IsConvertFp32ToFp16Supported(
const TensorInfo& input,
const TensorInfo& output,
armnn::DataType::Float32>;
template <typename QueueDescriptor>
+using Float32ToBFloat16Workload = MultiTypedWorkload<QueueDescriptor,
+ armnn::DataType::Float32,
+ armnn::DataType::BFloat16>;
+
+template <typename QueueDescriptor>
using Float16ToFloat32Workload = MultiTypedWorkload<QueueDescriptor,
armnn::DataType::Float16,
armnn::DataType::Float32>;
ValidateTensorShapesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output");
}
+void ConvertFp32ToBf16QueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
+{
+ const std::string descriptorName{"ConvertFp32ToBf16QueueDescriptor"};
+
+ ValidateNumInputs(workloadInfo, descriptorName, 1);
+ ValidateNumOutputs(workloadInfo, descriptorName, 1);
+
+ const TensorInfo& inputTensorInfo = workloadInfo.m_InputTensorInfos[0];
+ const TensorInfo& outputTensorInfo = workloadInfo.m_OutputTensorInfos[0];
+
+ if (inputTensorInfo.GetDataType() != DataType::Float32)
+ {
+ throw InvalidArgumentException(descriptorName + ": Input tensor type must be Float32.");
+ }
+
+ if (outputTensorInfo.GetDataType() != DataType::BFloat16)
+ {
+ throw InvalidArgumentException(descriptorName + ": Output tensor type must be BFloat16.");
+ }
+
+ ValidateTensorShapesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output");
+}
+
void ConvertFp32ToFp16QueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
{
const std::string descriptorName{"ConvertFp32ToFp16QueueDescriptor"};
void Validate(const WorkloadInfo& workloadInfo) const;
};
+struct ConvertFp32ToBf16QueueDescriptor : QueueDescriptor
+{
+ void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
struct ConvertFp16ToFp32QueueDescriptor : QueueDescriptor
{
void Validate(const WorkloadInfo& workloadInfo) const;
result = layerSupportObject->IsConvertFp16ToFp32Supported(input, output, reason);
break;
}
+ case LayerType::ConvertFp32ToBf16:
+ {
+ const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
+ const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
+ result = layerSupportObject->IsConvertFp32ToBf16Supported(input, output, reason);
+ break;
+ }
case LayerType::ConvertFp32ToFp16:
{
const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
return std::unique_ptr<IWorkload>();
}
+std::unique_ptr<IWorkload> IWorkloadFactory::CreateConvertFp32ToBf16(const ConvertFp32ToBf16QueueDescriptor& /*desc*/,
+ const WorkloadInfo& /*info*/) const
+{
+ return std::unique_ptr<IWorkload>();
+}
+
std::unique_ptr<IWorkload> IWorkloadFactory::CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& /*desc*/,
const WorkloadInfo& /*info*/) const
{
virtual std::unique_ptr<IWorkload> CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor,
const WorkloadInfo& info) const;
+ virtual std::unique_ptr<IWorkload> CreateConvertFp32ToBf16(const ConvertFp32ToBf16QueueDescriptor& descriptor,
+ const WorkloadInfo& info) const;
+
virtual std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor,
const WorkloadInfo& info) const;
test/layerTests/Conv2dTestImpl.cpp \
test/layerTests/ConvertBf16ToFp32TestImpl.cpp \
test/layerTests/ConvertFp16ToFp32TestImpl.cpp \
+ test/layerTests/ConvertFp32ToBf16TestImpl.cpp \
test/layerTests/ConvertFp32ToFp16TestImpl.cpp \
test/layerTests/DebugTestImpl.cpp \
test/layerTests/DepthToSpaceTestImpl.cpp \
layerTests/ConvertBf16ToFp32TestImpl.hpp
layerTests/ConvertFp16ToFp32TestImpl.cpp
layerTests/ConvertFp16ToFp32TestImpl.hpp
+ layerTests/ConvertFp32ToBf16TestImpl.cpp
+ layerTests/ConvertFp32ToBf16TestImpl.hpp
layerTests/ConvertFp32ToFp16TestImpl.cpp
layerTests/ConvertFp32ToFp16TestImpl.hpp
layerTests/DebugTestImpl.cpp
DECLARE_LAYER_POLICY_1_PARAM(ConvertFp16ToFp32)
+DECLARE_LAYER_POLICY_1_PARAM(ConvertFp32ToBf16)
+
DECLARE_LAYER_POLICY_1_PARAM(ConvertFp32ToFp16)
DECLARE_LAYER_POLICY_2_PARAM(Convolution2d)
#include <backendsCommon/test/layerTests/ConcatTestImpl.hpp>
#include <backendsCommon/test/layerTests/ConvertBf16ToFp32TestImpl.hpp>
#include <backendsCommon/test/layerTests/ConvertFp16ToFp32TestImpl.hpp>
+#include <backendsCommon/test/layerTests/ConvertFp32ToBf16TestImpl.hpp>
#include <backendsCommon/test/layerTests/ConvertFp32ToFp16TestImpl.hpp>
#include <backendsCommon/test/layerTests/Conv2dTestImpl.hpp>
#include <backendsCommon/test/layerTests/ConstantTestImpl.hpp>
--- /dev/null
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ConvertFp32ToBf16TestImpl.hpp"
+
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+LayerTestResult<armnn::BFloat16, 4> ConvertFp32ToBf16Test(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+ IgnoreUnused(memoryManager);
+
+ const armnn::TensorInfo inputTensorInfo({1, 2, 4, 3}, armnn::DataType::Float32);
+ const armnn::TensorInfo outputTensorInfo({1, 2, 4, 3}, armnn::DataType::BFloat16);
+
+ auto input = MakeTensor<float, 4>(inputTensorInfo,
+ { -37.5f, -15.2f, -8.76f,
+ -2.0f, -1.5f, -1.3f,
+ -0.5f, -0.4f, 0.0f,
+ 1.0f, 0.4f, 0.5f,
+ 1.3f, 1.5f, 2.0f,
+ 8.76f, 15.2f, 37.5f,
+ 3.8f, // 0x40733333 Round down
+ 3.1055E+29f, // 0x707ADC3C Round up
+ 9.149516E-10f, // 0x307B7FFF Round down
+ -3.8f, // 0xC0733333 Round down
+ -3.1055E+29f, // 0xF07ADC3C Round up
+ -9.149516E-10f // 0xB07B7FFF Round down
+ });
+
+ std::vector<armnn::BFloat16> outputValues = armnnUtils::QuantizedVector<armnn::BFloat16>(
+ {
+ -37.5f, -15.2f, -8.76f,
+ -2.0f, -1.5f, -1.3f,
+ -0.5f, -0.4f, 0.0f,
+ 1.0f, 0.4f, 0.5f,
+ 1.3f, 1.5f, 2.0f,
+ 8.76f, 15.2f, 37.5f,
+ 3.796875f, // 0x4073
+ 3.1072295E29f, // 0x707B
+ 9.131327E-10f, // 0x307B
+ -3.796875f, // 0xC073
+ -3.1072295E29f, // 0xF07B
+ -9.131327E-10f // 0xB07B
+ },
+ 1.0f, 0);
+
+ LayerTestResult<armnn::BFloat16, 4> ret(outputTensorInfo);
+ ret.outputExpected = MakeTensor<armnn::BFloat16, 4>(outputTensorInfo, outputValues);
+
+ std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+ std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+ armnn::ConvertFp32ToBf16QueueDescriptor data;
+ armnn::WorkloadInfo info;
+ AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+ AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+ std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvertFp32ToBf16(data, info);
+
+ inputHandle->Allocate();
+ outputHandle->Allocate();
+
+ CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+ workload->Execute();
+
+ CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+
+ return ret;
+}
--- /dev/null
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <BFloat16.hpp>
+
+#include <armnn/backends/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+LayerTestResult<armnn::BFloat16, 4> ConvertFp32ToBf16Test(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
&FalseFuncU8<>));
}
+bool RefLayerSupport::IsConvertFp32ToBf16Supported(const TensorInfo& input,
+ const TensorInfo& output,
+ Optional<std::string&> reasonIfUnsupported) const
+{
+ bool supported = true;
+
+ supported &= CheckSupportRule(TypeIs(input, DataType::Float32), reasonIfUnsupported,
+ "Reference for ConvertFp32ToBf16 layer: input type not supported");
+
+ supported &= CheckSupportRule(TypeIs(output, DataType::BFloat16), reasonIfUnsupported,
+ "Reference for ConvertFp32ToBf16 layer: output type not supported");
+
+ return supported;
+}
+
bool RefLayerSupport::IsConvertFp32ToFp16Supported(const TensorInfo& input,
const TensorInfo& output,
Optional<std::string&> reasonIfUnsupported) const
const TensorInfo& output,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+ bool IsConvertFp32ToBf16Supported(const TensorInfo& input,
+ const TensorInfo& output,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
bool IsConvertFp32ToFp16Supported(const TensorInfo& input,
const TensorInfo& output,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
return std::make_unique<RefConvertFp16ToFp32Workload>(descriptor, info);
}
+std::unique_ptr<IWorkload> RefWorkloadFactory::CreateConvertFp32ToBf16(
+ const ConvertFp32ToBf16QueueDescriptor& descriptor,
+ const WorkloadInfo& info) const
+{
+ return std::make_unique<RefConvertFp32ToBf16Workload>(descriptor, info);
+}
+
std::unique_ptr<IWorkload> RefWorkloadFactory::CreateConvertFp32ToFp16(
const ConvertFp32ToFp16QueueDescriptor& descriptor,
const WorkloadInfo& info) const
std::unique_ptr<IWorkload> CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
+ std::unique_ptr<IWorkload> CreateConvertFp32ToBf16(const ConvertFp32ToBf16QueueDescriptor& descriptor,
+ const WorkloadInfo& info) const override;
+
std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
workloads/RefConstantWorkload.cpp \
workloads/RefConvertBf16ToFp32Workload.cpp \
workloads/RefConvertFp16ToFp32Workload.cpp \
+ workloads/RefConvertFp32ToBf16Workload.cpp \
workloads/RefConvertFp32ToFp16Workload.cpp \
workloads/RefConvolution2dWorkload.cpp \
workloads/RefDebugWorkload.cpp \
BOOST_CHECK_EQUAL(reasonIfUnsupported, "Reference for ConvertBf16ToFp32 layer: output type not supported\n");
}
+BOOST_AUTO_TEST_CASE(IsConvertFp32ToBf16SupportedReference)
+{
+ std::string reasonIfUnsupported;
+
+ bool result = IsConvertLayerSupportedTests<armnn::RefWorkloadFactory, armnn::ConvertFp32ToBf16Layer,
+ armnn::DataType::Float32, armnn::DataType::BFloat16>(reasonIfUnsupported);
+
+ BOOST_CHECK(result);
+}
+
+BOOST_AUTO_TEST_CASE(IsConvertFp32ToBf16SupportedBf16InputReference)
+{
+ std::string reasonIfUnsupported;
+
+ bool result = IsConvertLayerSupportedTests<armnn::RefWorkloadFactory, armnn::ConvertFp32ToBf16Layer,
+ armnn::DataType::BFloat16, armnn::DataType::BFloat16>(reasonIfUnsupported);
+
+ BOOST_CHECK(!result);
+ BOOST_CHECK_EQUAL(reasonIfUnsupported, "Reference for ConvertFp32ToBf16 layer: input type not supported\n");
+}
+
+BOOST_AUTO_TEST_CASE(IsConvertFp32ToBf16SupportedFp32OutputReference)
+{
+ std::string reasonIfUnsupported;
+
+ bool result = IsConvertLayerSupportedTests<armnn::RefWorkloadFactory, armnn::ConvertFp32ToBf16Layer,
+ armnn::DataType::Float32, armnn::DataType::Float32>(reasonIfUnsupported);
+
+ BOOST_CHECK(!result);
+ BOOST_CHECK_EQUAL(reasonIfUnsupported, "Reference for ConvertFp32ToBf16 layer: output type not supported\n");
+}
+
BOOST_AUTO_TEST_CASE(IsConvertFp32ToFp16SupportedReference)
{
std::string reasonIfUnsupported;
// Convert from BFloat16 to Float32
ARMNN_AUTO_TEST_CASE(ConvertBf16ToFp32, ConvertBf16ToFp32Test)
+// Convert from Float32 to BFloat16
+ARMNN_AUTO_TEST_CASE(ConvertFp32ToBf16, ConvertFp32ToBf16Test)
+
// Convert from Float16 to Float32
ARMNN_AUTO_TEST_CASE(SimpleConvertFp16ToFp32, SimpleConvertFp16ToFp32Test)
// Convert from Float32 to Float16
RefConvertBf16ToFp32Workload.hpp
RefConvertFp16ToFp32Workload.cpp
RefConvertFp16ToFp32Workload.hpp
+ RefConvertFp32ToBf16Workload.cpp
+ RefConvertFp32ToBf16Workload.hpp
RefConvertFp32ToFp16Workload.cpp
RefConvertFp32ToFp16Workload.hpp
RefConvolution2dWorkload.cpp
--- /dev/null
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefConvertFp32ToBf16Workload.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include <armnnUtils/FloatingPointConverter.hpp>
+
+#include <BFloat16.hpp>
+
+namespace armnn
+{
+
+void RefConvertFp32ToBf16Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefConvertFp32ToBf16Workload_Execute");
+
+ const float* const input = GetInputTensorDataFloat(0, m_Data);
+ BFloat16* const output = GetOutputTensorDataBFloat16(0, m_Data);
+
+ unsigned int numElements = GetTensorInfo(m_Data.m_Inputs[0]).GetNumElements();
+ armnnUtils::FloatingPointConverter::ConvertFloat32ToBFloat16(input, numElements, output);
+}
+
+} //namespace armnn
--- /dev/null
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+#include <backendsCommon/WorkloadData.hpp>
+
+namespace armnn
+{
+
+class RefConvertFp32ToBf16Workload : public Float32ToBFloat16Workload<ConvertFp32ToBf16QueueDescriptor>
+{
+public:
+ using Float32ToBFloat16Workload<ConvertFp32ToBf16QueueDescriptor>::Float32ToBFloat16Workload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
return GetInputTensorData<BFloat16>(idx, data);
}
+template <typename PayloadType>
+BFloat16* GetOutputTensorDataBFloat16(unsigned int idx, const PayloadType& data)
+{
+ return GetOutputTensorData<BFloat16>(idx, data);
+}
+
////////////////////////////////////////////
/// u8 helpers
////////////////////////////////////////////
#include "RefConcatWorkload.hpp"
#include "RefConvertBf16ToFp32Workload.hpp"
#include "RefConvertFp16ToFp32Workload.hpp"
+#include "RefConvertFp32ToBf16Workload.hpp"
#include "RefConvertFp32ToFp16Workload.hpp"
#include "RefDebugWorkload.hpp"
#include "RefDepthToSpaceWorkload.hpp"