2 // Copyright (c) 2018 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
19 #include "program_helpers.h"
20 #include "program_impl.h"
21 #include "data_inst.h"
25 //helper function for merging the weights/biases buffers on cpu side for depthwise separable convolution optimization
26 void program_helpers::merge_buffers(engine_impl &engine, program_node &node, layout target_layout, size_t begin_offset, size_t end_offset)
28 memory_impl::ptr data_to_allocate = engine.allocate_memory(target_layout);
30 for (size_t i = begin_offset; i < end_offset; i++)
32 auto& weights = node.get_dependency(i).as<data>();
33 mem_lock<char> src{ weights.get_attached_memory() };
34 mem_lock<char> dst{ data_to_allocate };
35 std::copy(src.begin(), src.end(), dst.begin() + (i - begin_offset)*src.size());
38 for (size_t i = 0; i < end_offset - begin_offset - 1; i++)
39 node.remove_dependency(begin_offset + 1);
41 auto& data_node = node.get_dependency(begin_offset).as<data>();
42 data_node.attach_memory(*data_to_allocate, false);
45 //helper function for getting target layout used in depthwise sep optimization
46 layout program_helpers::get_weights_layout(typed_program_node<cldnn::data> &data_node, int32_t split)
48 auto mem_layout = data_node.get_output_layout();
50 return layout(mem_layout.data_type, mem_layout.format, { split * mem_layout.size.batch[0], mem_layout.size.feature[0], mem_layout.size.spatial[0], mem_layout.size.spatial[1] });
53 // pair.first tells whether l1 and l2 are absolutely identical
54 // pair.second tells whether l1 and l2 can be reinterpreted to each other without need of reordering
55 // note: layouts can only be considered identical if data size described by both layouts match (so no data are genereted nor dropped)
56 // note: if layouts describe two buffers with different size, consider them not to be identical even if smaller buffer can be considered to hold subsequence of larger buffer,
57 // this behavior is required to force buffer allocation for smaller buffer which, currently, should always be performed
58 std::pair<bool, bool> program_helpers::are_layouts_identical(layout const& l1, layout const& l2)
62 if (l1.data_type != l2.data_type)
63 return{ false, false };
64 if (l1.size != l2.size)
65 return{ false, false };
66 if (l1.get_linear_size() != l2.get_linear_size())
67 return{ false, false };
68 if ((l1.format == format::bf8_xy16 && l2.format != format::bf8_xy16) ||
69 (l2.format == format::bf8_xy16 && l1.format != format::bf8_xy16) ||
70 (l1.format == format::b_fs_yx_fsv4 && l2.format != format::b_fs_yx_fsv4) ||
71 (l2.format == format::b_fs_yx_fsv4 && l1.format != format::b_fs_yx_fsv4))
72 return{ false, false };
74 auto l1_pitch = l1.get_pitches();
75 auto l2_pitch = l2.get_pitches();
77 //ignore pitches which will never be used (for dims with size == 1)
78 for (size_t i = 0; i < CLDNN_TENSOR_DIM_MAX; ++i)
79 if (l1.size.raw[i] == 1)
81 for (size_t i = 0; i < CLDNN_TENSOR_DIM_MAX; ++i)
82 if (l2.size.raw[i] == 1)
85 auto l1_offset = l1.get_linear_offset();
86 auto l2_offset = l2.get_linear_offset();
87 if (l1_pitch == l2_pitch && l1_offset == l2_offset)
88 return{ false, true };
90 return{ false, false };