2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
6 #include <armnn/ArmNN.hpp>
10 #include <reference/RefWorkloadFactory.hpp>
12 #include <boost/test/unit_test.hpp>
14 BOOST_AUTO_TEST_SUITE(RefOptimizedNetwork)
16 BOOST_AUTO_TEST_CASE(OptimizeValidateCpuRefWorkloads)
18 const armnn::TensorInfo desc({3, 5}, armnn::DataType::Float32);
22 armnn::NormalizationDescriptor nmDesc;
23 armnn::ActivationDescriptor acDesc;
36 armnn::IConnectableLayer* layer = net.AddInputLayer(0, "in");
37 layer->GetOutputSlot(0).SetTensorInfo(desc);
39 armnn::IConnectableLayer* const normLayer = net.AddNormalizationLayer(nmDesc, "nm");
41 layer->GetOutputSlot(0).Connect(normLayer->GetInputSlot(0));
42 normLayer->GetOutputSlot(0).SetTensorInfo(desc);
44 layer = net.AddActivationLayer(acDesc, "ac");
46 normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
47 layer->GetOutputSlot(0).SetTensorInfo(desc);
49 armnn::IConnectableLayer* prevLayer = layer;
50 layer = net.AddMultiplicationLayer("ml");
52 prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
53 normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
54 layer->GetOutputSlot(0).SetTensorInfo(desc);
57 armnn::SoftmaxDescriptor softmaxDescriptor;
58 layer = net.AddSoftmaxLayer(softmaxDescriptor, "sm");
60 prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
61 layer->GetOutputSlot(0).SetTensorInfo(desc);
64 layer = net.AddOutputLayer(0, "ot");
66 prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
68 armnn::IRuntime::CreationOptions options;
69 armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
71 std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
72 armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec());
73 static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph().AllocateDynamicBuffers();
76 // Validates workloads.
77 armnn::RefWorkloadFactory fact;
78 for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph())
81 layer->CreateWorkload(static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph(), fact));
85 BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsCpuRefPermuteLayer)
87 // Create runtime in which test will run
88 armnn::IRuntime::CreationOptions options;
89 armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
91 std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
93 // build up the structure of the network
94 armnn::INetworkPtr net(armnn::INetwork::Create());
96 armnn::IConnectableLayer* input = net->AddInputLayer(0);
98 armnn::PermuteDescriptor descriptor({0, 2, 3, 1});
99 armnn::IConnectableLayer* permute = net->AddPermuteLayer(descriptor);
101 armnn::IConnectableLayer* output = net->AddOutputLayer(0);
103 input->GetOutputSlot(0).Connect(permute->GetInputSlot(0));
104 permute->GetOutputSlot(0).Connect(output->GetInputSlot(0));
106 input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 1, 4, 4 }, armnn::DataType::Float32));
107 permute->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 1, 4, 1, 4 }, armnn::DataType::Float32));
109 // optimize the network
110 armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
112 for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph())
114 BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef);
118 BOOST_AUTO_TEST_CASE(OptimizeValidateWorkloadsCpuRefMeanLayer)
120 // Create runtime in which test will run
121 armnn::IRuntime::CreationOptions options;
122 armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
124 std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
126 // build up the structure of the network
127 armnn::INetworkPtr net(armnn::INetwork::Create());
129 armnn::IConnectableLayer* input = net->AddInputLayer(0);
131 armnn::MeanDescriptor descriptor({ 0, 1 }, false);
132 armnn::IConnectableLayer* meanLayer = net->AddMeanLayer(descriptor);
134 armnn::IConnectableLayer* output = net->AddOutputLayer(0);
136 input->GetOutputSlot(0).Connect(meanLayer->GetInputSlot(0));
137 meanLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
139 input->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 4, 3, 2 }, armnn::DataType::Float32));
140 meanLayer->GetOutputSlot(0).SetTensorInfo(armnn::TensorInfo({ 2 }, armnn::DataType::Float32));
142 // optimize the network
143 armnn::IOptimizedNetworkPtr optNet = armnn::Optimize(*net, backends, runtime->GetDeviceSpec());
145 for (auto&& layer : static_cast<armnn::OptimizedNetwork*>(optNet.get())->GetGraph())
147 BOOST_CHECK(layer->GetBackendId() == armnn::Compute::CpuRef);
151 BOOST_AUTO_TEST_CASE(FP16TurboModeTestOnCpuRef)
153 // Test to check when FP16 Turbo mode set
154 // it converts the FP32 network to FP16 Network
155 // add FP32ToFP16 conversion layer after the InputLayer
156 // add FP16ToFP32 conversion layer after the OutputLayer
157 // checks the other layers if they are supported in FP16
158 // if they are not put the conversion layers before and after
159 // if they are not supported in FP16 use FP32 instead
160 // if there are inverse conversion layers remove them with optimization
161 // at the moment FloorLayer is not supported in FP16 so it rolls back to FP32
162 // and inverse conversion layers are removed by the optimizer
166 auto input = net.AddInputLayer(0);
167 auto floor = net.AddFloorLayer();
168 auto output = net.AddOutputLayer(0);
171 input->GetOutputSlot(0).Connect(floor->GetInputSlot(0));
172 floor->GetOutputSlot(0).Connect(output->GetInputSlot(0));
174 armnn::TensorShape shape({4});
175 armnn::TensorInfo info(shape, armnn::DataType::Float32);
176 input->GetOutputSlot(0).SetTensorInfo(info);
177 floor->GetOutputSlot(0).SetTensorInfo(info);
179 armnn::IRuntime::CreationOptions options;
180 armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
182 std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
184 armnn::OptimizerOptions optimizerOptions;
185 optimizerOptions.m_ReduceFp32ToFp16 = true;
187 armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(net, backends, runtime->GetDeviceSpec(),
190 std::ostringstream ss;
191 optimizedNet->SerializeToDot(ss);
193 auto inputId = input->GetGuid();
194 auto floorId = floor->GetGuid();
195 auto outputId = output->GetGuid();
197 std::stringstream expected;
199 "digraph Optimized {\n"
200 " node [shape=\"record\"];\n"
201 " edge [fontsize=8 fontcolor=\"blue\" fontname=\"arial-bold\"];\n"
202 " " << inputId << " [label=\"{Input}\"];\n"
203 " " << floorId << " [label=\"{Floor}\"];\n"
204 " " << outputId << " [label=\"{Output}\"];\n"
205 " " << inputId << " -> " << floorId << " [label=< [4] >];\n"
206 " " << floorId << " -> " << outputId << " [label=< [4] >];\n"
209 BOOST_TEST(ss.str() == expected.str());
212 BOOST_AUTO_TEST_SUITE_END()