1 // Copyright (C) 2020 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include "ngraph_functions/low_precision_transformations/convolution_function.hpp"
7 #include <ngraph/opsets/opset1.hpp>
8 #include <ngraph_ops/type_relaxed.hpp>
9 #include "ngraph_functions/subgraph_builders.hpp"
10 #include "low_precision/network_helper.hpp"
12 #include "ngraph_functions/low_precision_transformations/common/fake_quantize_on_weights.hpp"
13 #include "ngraph_functions/low_precision_transformations/common/fake_quantize_on_data.hpp"
14 #include "ngraph_functions/low_precision_transformations/common/dequantization_operations.hpp"
15 #include "ngraph_functions/low_precision_transformations/common/builders.hpp"
16 #include "low_precision/common/dequantization_op.hpp"
17 #include "low_precision/network_helper.hpp"
19 using namespace ngraph::pass::low_precision;
25 std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginal(
26 const ngraph::element::Type inputPrecision,
27 const ngraph::Shape& inputShape,
28 const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore,
29 std::shared_ptr<ngraph::opset1::Constant> weights,
30 const ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights) {
31 const auto input = std::make_shared<ngraph::opset1::Parameter>(inputPrecision, inputShape);
32 const auto dequantization = makeDequantization(input, dequantizationBefore);
34 const size_t inputChannelsCount = inputShape[1];
35 const size_t outputChannelsCount = 2 * inputShape[1];
37 if ((weights->cast_vector<float>().size() != 1ul) && (weights->cast_vector<float>().size() != (inputChannelsCount * outputChannelsCount))) {
38 THROW_IE_EXCEPTION << "unexpected actual weights values size";
41 if (weights->cast_vector<float>().size() == 1ul) {
42 auto targetShape = ngraph::Shape{ outputChannelsCount, inputChannelsCount, 1, 1 };
43 weights = as_type_ptr<ngraph::opset1::Constant>(fold<ngraph::opset1::Broadcast>(
44 weights, op::Constant::create(ngraph::element::i64, Shape{ targetShape.size() }, targetShape)));
47 const auto onWeights = fakeQuantizeOnWeights.empty() ? weights :
48 ngraph::builder::makeFakeQuantize(
49 weights, weights->get_element_type(),
50 fakeQuantizeOnWeights.quantizationLevel,
51 fakeQuantizeOnWeights.constantShape,
52 fakeQuantizeOnWeights.inputLowValues,
53 fakeQuantizeOnWeights.inputHighValues,
54 fakeQuantizeOnWeights.outputLowValues,
55 fakeQuantizeOnWeights.outputHighValues);
57 auto convolutionOriginal = ngraph::opset1::Convolution(
58 ngraph::op::TemporaryReplaceOutputType(dequantization, element::f32).get(),
59 ngraph::op::TemporaryReplaceOutputType(onWeights, element::f32).get(),
60 ngraph::Strides{ 1, 1 },
61 ngraph::CoordinateDiff{ 0, 0 },
62 ngraph::CoordinateDiff{ 0, 0 },
63 ngraph::Strides{ 1, 1 });
64 std::shared_ptr<ngraph::opset1::Convolution> convolution = std::make_shared<ngraph::op::TypeRelaxed<ngraph::opset1::Convolution>>(
66 std::vector<element::Type>{ element::f32, element::f32 },
67 std::vector<element::Type>{});
68 convolution->set_friendly_name("output");
69 auto& rtInfo = convolution->get_rt_info();
70 rtInfo["Variant::std::string"] = std::make_shared<VariantWrapper<std::string>>("convolution");
72 ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(convolution) };
73 return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "ConvolutionTransformation");
76 std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginalWithIncorrectWeights(
77 const ngraph::Shape& inputShape,
78 ngraph::element::Type precision,
79 ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights,
80 ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantizeOnData,
82 const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape));
83 const auto fqOnData = fakeQuantizeOnData.empty() ?
85 ngraph::builder::makeFakeQuantize(
86 input, precision, fakeQuantizeOnData.quantizationLevel, fakeQuantizeOnData.constantShape,
87 fakeQuantizeOnData.inputLowValues, fakeQuantizeOnData.inputHighValues, fakeQuantizeOnData.outputLowValues, fakeQuantizeOnData.outputHighValues);
89 const size_t inputChannelsCount = inputShape[1];
90 const size_t outputChannelsCount = 2 * inputShape[1];
91 const auto weights = ngraph::opset1::Constant::create(
93 ngraph::Shape{ outputChannelsCount, inputChannelsCount, 1, 1 },
94 std::vector<float>(outputChannelsCount * inputChannelsCount, 1));
96 const auto fqOnWeights = fakeQuantizeOnWeights.empty() ?
98 ngraph::builder::makeFakeQuantize(
99 weights, precision, fakeQuantizeOnWeights.quantizationLevel, fakeQuantizeOnWeights.constantShape,
100 fakeQuantizeOnWeights.inputLowValues, fakeQuantizeOnWeights.inputHighValues,
101 fakeQuantizeOnWeights.outputLowValues, fakeQuantizeOnWeights.outputHighValues);
103 const auto subtract = isCorrect ? nullptr : std::make_shared<DequantizationSubtract>(fqOnWeights,
104 std::make_shared<ngraph::opset1::Constant>(ngraph::element::f32, Shape{1, 1, 1, 1}, 3.0f));
106 const auto convolution = std::make_shared<ngraph::opset1::Convolution>(
107 fakeQuantizeOnData.empty() ? input : fqOnData,
108 isCorrect ? fqOnWeights : subtract,
109 ngraph::Strides{ 1, 1 },
110 ngraph::CoordinateDiff{ 0, 0 },
111 ngraph::CoordinateDiff{ 0, 0 },
112 ngraph::Strides{ 1, 1 });
114 ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(convolution) };
115 return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "IncorrectWeightsAndConvolutionFunction");
118 std::shared_ptr<ngraph::Function> ConvolutionFunction::getReferenceWithIncorrectWeights(
119 const ngraph::Shape& inputShape,
120 ngraph::element::Type precision,
121 ngraph::element::Type dataPrecision,
122 ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantizeOnData,
123 ngraph::builder::subgraph::DequantizationOperations dequantizationBefore,
124 ngraph::element::Type weightsPrecision,
125 std::vector<float> weightsValues,
126 ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights,
127 ngraph::builder::subgraph::DequantizationOperations dequantizationAfter,
129 const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape));
130 input->set_friendly_name("input");
132 std::shared_ptr<ngraph::opset1::FakeQuantize> fqOnData = as_type_ptr<ngraph::opset1::FakeQuantize>(ngraph::builder::makeFakeQuantize(
135 fakeQuantizeOnData.quantizationLevel,
136 fakeQuantizeOnData.constantShape,
137 fakeQuantizeOnData.inputLowValues,
138 fakeQuantizeOnData.inputHighValues,
139 fakeQuantizeOnData.outputLowValues,
140 fakeQuantizeOnData.outputHighValues));
142 ngraph::pass::low_precision::NetworkHelper::setOutDataPrecision(fqOnData, dataPrecision);
144 const auto deqBefore = dequantizationBefore.empty() ? nullptr : makeDequantization(fqOnData, dequantizationBefore);
146 const size_t inputChannelsCount = inputShape[1];
147 const size_t outputChannelsCount = 2 * inputShape[1];
149 if ((weightsValues.size() != 1ul) && (weightsValues.size() != (inputChannelsCount * outputChannelsCount))) {
150 THROW_IE_EXCEPTION << "unexpected actual weights values size";
153 const std::shared_ptr<ngraph::Node> weights = ngraph::opset1::Constant::create(
155 ngraph::Shape{ outputChannelsCount, inputChannelsCount, 1, 1 },
156 weightsValues.size() == 1ul ?
157 std::vector<float>(outputChannelsCount * inputChannelsCount, weightsValues[0]) :
160 const auto fqOnWeights = fakeQuantizeOnWeights.empty() ?
162 ngraph::builder::makeFakeQuantize(
163 weights, precision, fakeQuantizeOnWeights.quantizationLevel, fakeQuantizeOnWeights.constantShape,
164 fakeQuantizeOnWeights.inputLowValues, fakeQuantizeOnWeights.inputHighValues,
165 fakeQuantizeOnWeights.outputLowValues, fakeQuantizeOnWeights.outputHighValues);
167 const auto subtract = isCorrect ? nullptr : std::make_shared<DequantizationSubtract>(fqOnWeights,
168 std::make_shared<ngraph::opset1::Constant>(precision, Shape{ 1, 1, 1, 1 }, 3.0f));
170 auto convolutionOriginal = ngraph::opset1::Convolution(
171 ngraph::op::TemporaryReplaceOutputType(dequantizationBefore.empty() ? fqOnData : deqBefore, element::f32).get(),
172 ngraph::op::TemporaryReplaceOutputType(isCorrect ? weights : subtract, element::f32).get(),
173 ngraph::Strides{ 1, 1 },
174 ngraph::CoordinateDiff{ 0, 0 },
175 ngraph::CoordinateDiff{ 0, 0 },
176 ngraph::Strides{ 1, 1 });
178 std::shared_ptr<ngraph::opset1::Convolution> convolution = std::make_shared<ngraph::op::TypeRelaxed<ngraph::opset1::Convolution>>(
180 std::vector<element::Type>{ element::f32, element::f32 },
181 std::vector<element::Type>{});
183 std::shared_ptr<ngraph::Node> multiply;
184 if (!dequantizationAfter.multiply.empty()) {
185 ngraph::Shape constShape = isCorrect ? Shape{ 1, 1, 1 } : Shape{ 1, 1, 1, 1 };
186 multiply = std::make_shared<DequantizationMultiply>(convolution,
187 std::make_shared<ngraph::opset1::Constant>(precision, constShape, dequantizationAfter.multiply.values[0]));
190 replace_node(fqOnData->get_input_node_shared_ptr(3),
191 std::make_shared<ngraph::opset1::Constant>(precision, Shape{}, fakeQuantizeOnData.outputLowValues[0]));
193 replace_node(fqOnData->get_input_node_shared_ptr(4),
194 std::make_shared<ngraph::opset1::Constant>(precision, Shape{}, fakeQuantizeOnData.outputHighValues[0]));
196 ngraph::pass::low_precision::NetworkHelper::setOutDataPrecision(fqOnData, dataPrecision);
198 if (!dequantizationBefore.multiply.empty()) {
199 ngraph::Shape constShape = isCorrect ? Shape{ 1, 1, 1 } : Shape{ 1, 1, 1, 1 };
201 deqBefore->get_input_node_shared_ptr(1),
202 std::make_shared<ngraph::opset1::Constant>(precision, constShape, dequantizationBefore.multiply.values[0]));
208 ngraph::pass::low_precision::fold<ngraph::opset1::Convert>(weights, weightsPrecision));
211 ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(dequantizationAfter.empty() ? convolution : multiply) };
212 return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "IncorrectWeightsAndConvolutionFunction");
215 std::shared_ptr<ngraph::Function> ConvolutionFunction::getReference(
216 const ngraph::element::Type inputPrecision,
217 const ngraph::Shape& inputShape,
218 const ngraph::builder::subgraph::DequantizationOperations& dequantizationBefore,
219 std::shared_ptr<ngraph::opset1::Constant> weights,
220 const ngraph::builder::subgraph::FakeQuantizeOnWeights fakeQuantizeOnWeights,
221 const ngraph::element::Type precisionAfterOperation,
222 const ngraph::builder::subgraph::DequantizationOperations& dequantizationAfter,
223 const ngraph::element::Type precisionAfterDequantization) {
224 auto input = std::make_shared<ngraph::opset1::Parameter>(inputPrecision, inputShape);
225 const auto deqBefore = makeDequantization(input, dequantizationBefore);
227 const size_t inputChannelsCount = inputShape[1];
228 const size_t outputChannelsCount = 2 * inputShape[1];
230 if ((weights->cast_vector<float>().size() != 1ul) && (weights->cast_vector<float>().size() != (inputChannelsCount * outputChannelsCount))) {
231 THROW_IE_EXCEPTION << "unexpected actual weights values size";
234 if (weights->cast_vector<float>().size() == 1ul) {
235 auto targetShape = ngraph::Shape{ outputChannelsCount, inputChannelsCount, 1, 1 };
236 weights = as_type_ptr<ngraph::opset1::Constant>(fold<ngraph::opset1::Broadcast>(
237 weights, op::Constant::create(ngraph::element::i64, Shape{ targetShape.size() }, targetShape)));
240 std::shared_ptr<ngraph::Node> onWeights = fakeQuantizeOnWeights.empty() ?
241 std::dynamic_pointer_cast<ngraph::Node>(weights) :
242 ngraph::builder::makeFakeQuantize(
244 weights->get_element_type(),
245 fakeQuantizeOnWeights.quantizationLevel,
246 fakeQuantizeOnWeights.constantShape,
247 fakeQuantizeOnWeights.inputLowValues,
248 fakeQuantizeOnWeights.inputHighValues,
249 fakeQuantizeOnWeights.outputLowValues,
250 fakeQuantizeOnWeights.outputHighValues);
252 auto convolutionOriginal = ngraph::opset1::Convolution(
253 ngraph::op::TemporaryReplaceOutputType(deqBefore, element::f32).get(),
254 ngraph::op::TemporaryReplaceOutputType(onWeights, element::f32).get(),
255 ngraph::Strides{ 1, 1 },
256 ngraph::CoordinateDiff{ 0, 0 },
257 ngraph::CoordinateDiff{ 0, 0 },
258 ngraph::Strides{ 1, 1 });
260 std::shared_ptr<ngraph::opset1::Convolution> convolution = std::make_shared<ngraph::op::TypeRelaxed<ngraph::opset1::Convolution>>(
262 std::vector<element::Type>{ element::f32, element::f32 },
263 std::vector<element::Type>{});
265 ngraph::pass::low_precision::NetworkHelper::setOutDataPrecisionForTypeRelaxed(convolution, precisionAfterOperation);
266 auto& rtInfo = convolution->get_rt_info();
267 rtInfo["Variant::std::string"] = std::make_shared<VariantWrapper<std::string>>("convolution");
269 const auto deqAfter = makeDequantization(convolution, dequantizationAfter);
270 deqAfter->set_friendly_name("output");
272 ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(deqAfter) };
273 return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "ConvolutionTransformation");
276 std::shared_ptr<ngraph::Function> ConvolutionFunction::get(
277 const ngraph::Shape& inputShape,
278 const ngraph::element::Type precision,
279 const ngraph::builder::subgraph::FakeQuantizeOnData& fakeQuantizeOnData,
280 const std::vector<float>& weightsValues,
281 const ngraph::builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights) {
282 const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape));
283 input->set_friendly_name("input");
285 const std::shared_ptr<ngraph::opset1::FakeQuantize> fqOnData = as_type_ptr<ngraph::opset1::FakeQuantize>(ngraph::builder::makeFakeQuantize(
288 fakeQuantizeOnData.quantizationLevel,
289 fakeQuantizeOnData.constantShape,
290 fakeQuantizeOnData.inputLowValues,
291 fakeQuantizeOnData.inputHighValues,
292 fakeQuantizeOnData.outputLowValues,
293 fakeQuantizeOnData.outputHighValues));
295 const size_t inputChannelsCount = inputShape[1];
296 const size_t outputChannelsCount = 2 * inputShape[1];
297 if ((weightsValues.size() != 1ul) && (weightsValues.size() != (inputChannelsCount * outputChannelsCount))) {
298 THROW_IE_EXCEPTION << "unexpected actual weights values size";
301 const std::shared_ptr<ngraph::Node> parentOnData = fakeQuantizeOnData.empty() ? std::dynamic_pointer_cast<ngraph::Node>(input) : fqOnData;
303 const std::shared_ptr<ngraph::Node> weights = ngraph::opset1::Constant::create(
305 ngraph::Shape{ outputChannelsCount, inputChannelsCount, 1, 1 },
306 weightsValues.size() == 1ul ?
307 std::vector<float>(outputChannelsCount * inputChannelsCount, weightsValues[0]) :
310 const std::shared_ptr<ngraph::Node> parentOnWeights = fakeQuantizeOnWeights.empty() ?
312 ngraph::builder::makeFakeQuantize(
313 weights, precision, fakeQuantizeOnWeights.quantizationLevel, fakeQuantizeOnWeights.constantShape,
314 fakeQuantizeOnWeights.inputLowValues, fakeQuantizeOnWeights.inputHighValues,
315 fakeQuantizeOnWeights.outputLowValues, fakeQuantizeOnWeights.outputHighValues);
317 auto convolutionOriginal = ngraph::opset1::Convolution(
318 ngraph::op::TemporaryReplaceOutputType(parentOnData, element::f32).get(),
319 ngraph::op::TemporaryReplaceOutputType(parentOnWeights, element::f32).get(),
320 ngraph::Strides{ 1, 1 },
321 ngraph::CoordinateDiff{ 0, 0 },
322 ngraph::CoordinateDiff{ 0, 0 },
323 ngraph::Strides{ 1, 1 });
325 const std::shared_ptr<ngraph::opset1::Convolution> convolution = std::make_shared<ngraph::op::TypeRelaxed<ngraph::opset1::Convolution>>(
327 std::vector<element::Type>{ element::f32, element::f32 },
328 std::vector<element::Type>{});
330 ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(convolution) };
331 return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "ConvolutionFunction");
334 } // namespace subgraph
335 } // namespace builder
336 } // namespace ngraph