inference-engine/thirdparty/clDNN/tests/test_cases/binary_convolution_gpu_test.cpp

   1 /*
   2 // Copyright (c) 2019 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18
  19 #include <gtest/gtest.h>
  20 #include <api/CPP/memory.hpp>
  21 #include <api/CPP/input_layout.hpp>
  22 #include "api/CPP/binary_convolution.hpp"
  23 #include "api/CPP/reorder.hpp"
  24 #include <api/CPP/topology.hpp>
  25 #include <api/CPP/network.hpp>
  26 #include <api/CPP/engine.hpp>
  27 #include "test_utils/test_utils.h"
  28 #include <iostream>
  29 #include <api/CPP/data.hpp>
  30 #include <src/include/to_string_utils.h>
  31 #include "float16.h"
  32 #include "test_utils.h"
  33
  34 using namespace cldnn;
  35 using namespace tests;
  36
  37 // Batch, groups, IC, IW, IH, OC, OW, OH, KH, KW, SH, SW, PH, PW
  38 struct TestParams {
  39     int b;
  40     int g;
  41
  42     int ic;
  43     int ih;
  44     int iw;
  45
  46     int oc;
  47     int oh;
  48     int ow;
  49
  50     int kh;
  51     int kw;
  52
  53     int sh;
  54     int sw;
  55
  56     int ph;
  57     int pw;
  58
  59     float pad_value;
  60     data_types dt;
  61     std::string name;
  62
  63     bool isConsistent() const
  64     {
  65         bool res = true;
  66
  67         res &= (((iw - kw + 2*pw) / sw + 1) == ow);
  68         res &= (((ih - kh + 2*ph) / sh + 1) == oh);
  69         return res;
  70     }
  71
  72     friend ::std::ostream& operator<<(::std::ostream& os, const TestParams& p) {
  73         return os << "Params: [ b=" << p.b
  74                   << "; g=" << p.g
  75                   << "; src=[" << p.ic << "; " << p.ih << "; " << p.iw << "]"
  76                   << "; dst=[" << p.oc << "; " << p.oh << "; " << p.ow << "]"
  77                   << "; k=[" << p.kh << "; " << p.kw << "]"
  78                   << "; stride=[" << p.sh << "; " << p.sw << "]"
  79                   << "; pad=[" << p.ph << "; " << p.pw << "]"
  80                   << "; pad_value=" << p.pad_value
  81                   << "; name=" << p.name
  82                   << "]";
  83     }
  84     friend void PrintTo(const TestParams& p, ::std::ostream* os) {
  85         *os << p;
  86     }
  87 };
  88
  89 static void fill(cldnn::memory& mem) {
  90     auto ptr = mem.pointer<uint32_t>();
  91     for (size_t i = 0; i < div_up(mem.get_layout().count(), 32); i++) {
  92         ptr[i] = (uint32_t)rand() % (1 << 31);
  93     }
  94 }
  95
  96 template <typename data_t_src, typename data_t_wei,
  97           typename data_t_acc, typename data_t_dst>
  98 void compute_ref_conv_bin(const cldnn::memory &src,
  99                           const cldnn::memory &weights,
 100                           cldnn::memory &dst,
 101                           TestParams &p)
 102 {
 103     auto src_data     = src.pointer<data_t_src>();
 104     auto weights_data = weights.pointer<data_t_wei>();
 105     auto dst_data     = dst.pointer<data_t_dst>();
 106
 107     bool with_groups = p.g > 1;
 108     int pack_size = sizeof(data_t_src) * 8;
 109
 110     int B = p.b;
 111     int NG = p.g;
 112     int IC = p.ic;
 113     int IH = p.ih;
 114     int IW = p.iw;
 115
 116     int OC = p.oc;
 117     int OH = p.oh;
 118     int OW = p.ow;
 119
 120     int KH = p.kh;
 121     int KW = p.kw;
 122
 123     int SH = p.sh;
 124     int SW = p.sw;
 125
 126     int PH = p.ph;
 127     int PW = p.pw;
 128
 129
 130     auto extract_bit = [&](data_t_src val, data_t_src bit) -> data_t_src {
 131         return (data_t_src)((val >> bit) & 0x1);
 132     };
 133
 134     auto ker = [=](data_t_acc &d, int g, int mb, int oc,int oh, int ow, int& ks) {
 135         for (int ic = 0; ic < IC / NG; ++ic) {
 136             for (int kh = 0; kh < KH; ++kh)
 137                 for (int kw = 0; kw < KW; ++kw) {
 138                     const int ih = oh * SH - PH + kh;
 139                     const int iw = ow * SW - PW + kw;
 140
 141                     int widx =   g * OC / NG *IC / NG * KH * KW
 142                                  + oc * IC / NG * KH * KW
 143                                  + ic * KH * KW
 144                                  + kh * KW
 145                                  + kw;
 146                     int iidx = -1;
 147                     uint8_t w = extract_bit(weights_data[widx / pack_size], widx % pack_size);
 148                     uint8_t s = 0;
 149
 150                     if ((ih < 0 || ih >= IH || iw < 0 || iw >= IW))
 151                     {
 152                         if (p.pad_value == 0.0f)
 153                             continue;
 154                         else
 155                             s = (p.pad_value == -1.0f) ? 0 : 1;
 156                     }
 157                     else
 158                     {
 159                         if (ic == 0) ks++;
 160                         iidx = mb * div_up(IC, pack_size) * IH * IW
 161                                + g * div_up(IC, pack_size) / NG * IH * IW
 162                                + (ic/pack_size) * IH * IW
 163                                + ih * IW
 164                                + iw;
 165
 166                         s = extract_bit(src_data[iidx], ic % pack_size);
 167                     }
 168                     d += (data_t_acc)(s ^ w);
 169                 }
 170         }
 171     };
 172
 173     for (int g = 0; g < NG; g++) {
 174         for (int b = 0; b < B; b++) {
 175             for (int oc = 0; oc < OC / NG; oc++) {
 176                 for (int oh = 0; oh < OH; oh++) {
 177                     for (int ow = 0; ow < OW; ow++) {
 178                         data_t_acc a = 0;
 179                         int ks = 0;
 180                         ker(a, g, b, oc, oh, ow, ks);
 181                         int dst_off = b * OC * OH* OW
 182                                       + g * OC / NG * OH * OW
 183                                       + oc * OH * OW
 184                                       + oh * OW
 185                                       + ow;
 186                         if (p.pad_value == 0.0f)
 187                             dst_data[dst_off] =(data_t_dst)(IC*ks - 2*a);
 188                         else
 189                             dst_data[dst_off] = (data_t_dst)(IC*KH*KW - 2*a);
 190                     }
 191                 }
 192             }
 193         }
 194     }
 195 }
 196
 197 class binary_convolution_test : public ::testing::TestWithParam<TestParams>
 198 {
 199     void SetUp()
 200     {
 201         std::cout << GetParam() << std::endl;
 202         ASSERT_TRUE(GetParam().isConsistent());
 203     }
 204 };
 205
 206 TEST_P(binary_convolution_test, conv)
 207 {
 208     const auto& engine = get_test_engine();
 209     cldnn::build_options options;
 210     options.set_option(cldnn::build_option::optimize_data(true));
 211     topology topology_bin;
 212
 213     std::string weights_suffix = "_w_";
 214
 215     std::string input_name = "input";
 216     std::string output_name = "conv";
 217
 218     TestParams p = GetParam();
 219
 220     cldnn::tensor stride = cldnn::tensor{cldnn::batch(1), cldnn::feature(1), cldnn::spatial(p.sw, p.sh)};
 221     cldnn::tensor pad = cldnn::tensor{cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-p.pw, -p.ph)};
 222     cldnn::tensor dilation = {1,1,1,1};
 223
 224     cldnn::tensor is_size{ cldnn::batch(p.b),
 225                            cldnn::feature(p.ic),
 226                            cldnn::spatial(p.iw, p.ih) };
 227     cldnn::tensor wei_size{ cldnn::batch(p.oc),
 228                             cldnn::feature(p.ic),
 229                             cldnn::spatial(p.kw, p.kh) };
 230     cldnn::tensor os_size{ cldnn::batch(p.b),
 231                             cldnn::feature(p.oc),
 232                             cldnn::spatial(p.ow, p.oh)};
 233
 234     auto input       = memory::allocate(engine, { cldnn::data_types::bin, cldnn::format::b_fs_yx_32fp, is_size });
 235     auto weights     = memory::allocate(engine, { cldnn::data_types::bin, cldnn::format::bfyx, wei_size });
 236     auto output_ref  = memory::allocate(engine, { cldnn::data_types::f32, cldnn::format::bfyx, os_size });
 237
 238     fill(input);
 239     fill(weights);
 240
 241     compute_ref_conv_bin<uint32_t, uint32_t, int32_t, float>(input, weights, output_ref, p);
 242
 243 //    print_bin_blob(input,"input");
 244 //    print_bin_blob_packed(input,"input");
 245 //    print_bin_blob(weights, "weights");
 246 //    print_blob(output_ref, "ref_out");
 247
 248     topology_bin.add(input_layout(input_name, input.get_layout()));
 249     topology_bin.add(data(output_name + weights_suffix, weights));
 250
 251     topology_bin.add(binary_convolution(output_name, input_name, {output_name + weights_suffix},
 252                                         stride, pad, dilation, os_size, 1, p.pad_value, p.dt));
 253
 254     network network_bin(engine, topology_bin, options);
 255     network_bin.set_input_data(input_name, input);
 256
 257     std::map<primitive_id, network_output> outputs = network_bin.execute();
 258     auto outputMemory = outputs.at(output_name).get_memory();
 259
 260
 261     for (size_t i = 0; i < output_ref.count(); i++) {
 262         if (p.dt == data_types::f32)
 263         {
 264             auto ref = output_ref.pointer<float>();
 265             auto opt = outputMemory.pointer<float>();
 266
 267             ASSERT_EQ(ref[i], opt[i]) << i;
 268         }
 269         else if (p.dt == data_types::f16)
 270         {
 271             auto ref = output_ref.pointer<float>();
 272             auto opt = outputMemory.pointer<uint16_t>();
 273
 274             ASSERT_EQ(ref[i], float16_to_float32(opt[i])) << i;
 275         }
 276     }
 277 }
 278
 279 // Batch, groups, IC, IW, IH, OC, OW, OH, KH, KW, SH, SW, PH, PW
 280 INSTANTIATE_TEST_CASE_P(BinaryConvTest, binary_convolution_test, ::testing::Values(
 281         TestParams{1, 1,  16,2,2,   4,2,2, 3,3, 1,1, 1,1, -1.0f, data_types::f32, "small"},
 282         TestParams{1, 1,  17,2,2,   4,2,2, 3,3, 1,1, 1,1, -1.0f, data_types::f32, "small"},
 283         TestParams{1, 1,  17,2,2,   4,2,2, 3,3, 1,1, 1,1,  0.0f, data_types::f32, "small"},
 284         TestParams{1, 1,  17,2,2,   4,2,2, 3,3, 1,1, 1,1,  1.0f, data_types::f32, "small"},
 285         TestParams{1, 1,  16,2,2,  16,2,2, 3,3, 1,1, 1,1,  1.0f, data_types::f32, "small"},
 286         TestParams{1, 1,  32,2,2,  32,2,2, 3,3, 1,1, 1,1,  1.0f, data_types::f32, "small"},
 287         TestParams{1, 1,  32,2,2,  32,2,2, 1,1, 1,1, 0,0,  1.0f, data_types::f32, "small"},
 288         TestParams{1, 1, 128,2,2, 128,2,2, 1,1, 1,1, 0,0, -1.0f, data_types::f32, "small"},
 289         TestParams{1, 1,  16,4,3,   4,4,3, 1,1, 1,1, 0,0, -1.0f, data_types::f32, "small"},
 290         TestParams{1, 1,  16,2,2,   4,2,2, 3,3, 1,1, 1,1, -1.0f, data_types::f16, "small"},
 291         TestParams{1, 1,  17,2,2,   4,2,2, 3,3, 1,1, 1,1, -1.0f, data_types::f16, "small"},
 292         TestParams{1, 1,  17,2,2,   4,2,2, 3,3, 1,1, 1,1,  0.0f, data_types::f16, "small"},
 293         TestParams{1, 1,  17,2,2,   4,2,2, 3,3, 1,1, 1,1,  1.0f, data_types::f16, "small"},
 294         TestParams{1, 1,  16,2,2,  16,2,2, 3,3, 1,1, 1,1,  1.0f, data_types::f16, "small"},
 295         TestParams{1, 1,  32,2,2,  32,2,2, 3,3, 1,1, 1,1,  1.0f, data_types::f16, "small"},
 296         TestParams{1, 1,  32,2,2,  32,2,2, 1,1, 1,1, 0,0,  1.0f, data_types::f16, "small"},
 297         TestParams{1, 1, 128,2,2, 128,2,2, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "small"},
 298         TestParams{1, 1,  16,4,3,   4,4,3, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "small"},
 299         TestParams{1, 1,  9,16,32, 17,8,16, 7,7, 2,2, 3,3, -1.0f, data_types::f16, "small"},
 300         TestParams{1, 1,  9,16,32, 17,8,16, 7,7, 2,2, 3,3, 1.0f, data_types::f16, "small"},
 301
 302         // Resnet-18 3x3
 303         TestParams{1, 1,  64,56,56,  64,56,56, 3,3, 1,1, 1,1, -1.0f, data_types::f16, "resnet18_0"},
 304         TestParams{1, 1,  64,56,56, 128,28,28, 3,3, 2,2, 1,1, -1.0f, data_types::f16, "resnet18_1"},
 305         TestParams{1, 1, 128,28,28, 128,28,28, 3,3, 1,1, 1,1, -1.0f, data_types::f16, "resnet18_2"},
 306         TestParams{1, 1, 128,28,28, 256,14,14, 3,3, 2,2, 1,1, -1.0f, data_types::f16, "resnet18_3"},
 307         TestParams{1, 1, 256,14,14, 256,14,14, 3,3, 1,1, 1,1, -1.0f, data_types::f16, "resnet18_4"},
 308         TestParams{1, 1, 256,14,14, 512, 7, 7, 3,3, 2,2, 1,1, -1.0f, data_types::f16, "resnet18_5"},
 309         TestParams{1, 1, 512, 7, 7, 512, 7, 7, 3,3, 1,1, 1,1, -1.0f, data_types::f16, "resnet18_6"},
 310         // Resnet-50
 311         TestParams{1, 1, 64,56,56, 64,56,56, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "resnet50_0"},
 312         TestParams{1, 1, 64,56,56, 256,56,56, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "resnet50_1"},
 313         TestParams{1, 1, 256,56,56, 128,28,28, 1,1, 2,2, 0,0, -1.0f, data_types::f16, "resnet50_2"},
 314         TestParams{1, 1, 128,28,28, 512,28,28, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "resnet50_3"},
 315         TestParams{1, 1, 512,28,28, 128,28,28, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "resnet50_4"},
 316         TestParams{1, 1, 512,28,28, 256,14,14, 1,1, 2,2, 0,0, -1.0f, data_types::f16, "resnet50_5"},
 317         TestParams{1, 1, 256,14,14, 1024,14,14, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "resnet50_6"},
 318         TestParams{1, 1, 1024,14,14, 256,14,14, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "resnet50_7"},
 319         TestParams{1, 1, 1024,14,14, 512,7,7, 1,1, 2,2, 0,0, -1.0f, data_types::f16, "resnet50_8"},
 320         TestParams{1, 1, 512,7,7, 2048,7,7, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "resnet50_9"},
 321         TestParams{1, 1, 2048,7,7, 512,7,7, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "resnet50_10"},
 322         // Mobilenet-ssd-vd
 323         TestParams{1, 1,  56,96,168, 112,96,168, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv2_2_sep_BIN"}, // back_bone_seq_conv2_2_sep_BIN
 324         TestParams{1, 1, 112,96,168, 112,96,168, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv3_1_sep_BIN"}, // back_bone_seq_conv3_1_sep_BIN
 325         TestParams{1, 1,  112,48,84, 208,48, 84, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv3_2_sep_BIN"}, // back_bone_seq_conv3_2_sep_BIN
 326         TestParams{1, 1,  208,48,84, 216,48, 84, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv4_1_sep_BIN"}, // back_bone_seq_conv4_1_sep_BIN
 327         TestParams{1, 1,  216,24,42, 328,24, 42, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv4_2_sep_BIN"}, // back_bone_seq_conv4_2_sep_BIN
 328         TestParams{1, 1,  328,24,42, 288,24, 42, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv5_1_sep_BIN"}, // back_bone_seq_conv5_1_sep_BIN
 329         TestParams{1, 1,  288,24,42, 288,24, 42, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv5_2_sep_BIN"}, // back_bone_seq_conv5_2_sep_BIN
 330         TestParams{1, 1,  288,24,42, 240,24, 42, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv5_3_sep_BIN"}, // back_bone_seq_conv5_3_sep_BIN
 331         TestParams{1, 1,  240,24,42, 264,24, 42, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv5_4_sep_BIN"}, // back_bone_seq_conv5_4_sep_BIN
 332         TestParams{1, 1,  264,24,42, 192,24, 42, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv5_5_sep_BIN"}, // back_bone_seq_conv5_5_sep_BIN
 333         TestParams{1, 1,  192,12,21, 208,12, 21, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv5_6_sep_BIN"}, // back_bone_seq_conv5_6_sep_BIN
 334         TestParams{1, 1,  208,12,21,  88,12, 21, 1,1, 1,1, 0,0, -1.0f, data_types::f16, "conv6_sep_BN"} // back_bone_seq_conv6_sep_BN
 335 ),);
 336
 337 template <typename T>
 338 static void set_binary_values(const cldnn::memory& mem, std::vector<T> args) {
 339     auto ptr = mem.pointer<T>();
 340
 341     auto it = ptr.begin();
 342     for (auto x : args)
 343         *it++ = x;
 344 }
 345
 346 TEST(binary_convolution, basic_convolution_1x1_single_packed_channel)
 347 {
 348     const auto& engine = get_test_engine();
 349
 350     auto input = memory::allocate(engine, { data_types::bin, format::b_fs_yx_32fp, { 1, 16, 2, 2 } });
 351     auto weights = memory::allocate(engine, { data_types::bin, format::bfyx, { 4, 16, 1, 1 } });
 352
 353     // 0 0 1 0  0 1 0 0  1 0 1 0  1 0 1 0
 354     // 1 0 0 0  0 1 1 0  0 1 1 0  1 0 1 0
 355     // 1 1 0 0  1 0 1 1  1 1 1 1  1 0 1 0
 356     // 0 0 0 0  0 0 0 0  0 0 0 0  0 0 0 1
 357     set_binary_values<uint32_t>(input, { 21796, 22113, 24531, 32768 });
 358
 359     // 1 1 1 1  1 1 1 1  1 1 1 1  1 1 1 1
 360     // 0 1 0 1  0 1 0 1  1 0 1 0  1 0 1 0
 361     // 1 0 1 0  1 0 1 0  0 1 0 1  0 1 0 1
 362     // 0 0 0 0  0 0 0 0  0 0 0 0  0 0 0 0
 363     set_binary_values<uint16_t>(weights, { 65535, 21930, 43605, 0 });
 364
 365     // 16 - 2*popcount(1 1 0 1  1 0 1 1  0 1 0 1  0 1 0 1) = -4
 366     // 16 - 2*popcount(0 1 1 1  1 0 0 1  1 0 0 1  0 1 0 1) = -2
 367     // 16 - 2*popcount(0 0 1 1  0 1 0 0  0 0 0 0  0 1 0 1) = 6
 368     // 16 - 2*popcount(1 1 1 1  1 1 1 1  1 1 1 1  1 1 1 0) = -14
 369
 370     // 16 - 2*popcount(0 1 1 1  0 0 0 1  0 0 0 0  0 0 0 0) = 8
 371     // 16 - 2*popcount(1 1 0 1  0 0 1 1  1 1 0 0  0 0 0 0) = 2
 372     // 16 - 2*popcount(1 0 0 1  1 1 1 0  0 1 0 1  0 0 0 0) = 2
 373     // 16 - 2*popcount(0 1 0 1  0 1 0 1  1 0 1 0  1 0 1 1) = -2
 374
 375     // 16 - 2*popcount(1 0 0 0  1 1 1 0  1 1 1 1  1 1 1 1) = -8
 376     // 16 - 2*popcount(0 0 1 0  1 1 0 0  0 0 1 1  1 1 1 1) = -2
 377     // 16 - 2*popcount(0 1 1 0  0 0 0 1  1 0 1 0  1 1 1 1) = -2
 378     // 16 - 2*popcount(1 0 1 0  1 0 1 0  0 1 0 1  0 1 0 0) = 2
 379
 380     // 16 - 2*popcount(0 0 1 0  0 1 0 0  1 0 1 0  1 0 1 0) = 4
 381     // 16 - 2*popcount(1 0 0 0  0 1 1 0  0 1 1 0  1 0 1 0) = 2
 382     // 16 - 2*popcount(1 1 0 0  1 0 1 1  1 1 1 1  1 0 1 0) = -6
 383     // 16 - 2*popcount(0 0 0 0  0 0 0 0  0 0 0 0  0 0 0 1) = 14
 384     VF<float> output_vec = {
 385             -4.0f, -2.0f,  6.0f, -14.0f,
 386              8.0f,  2.0f,  2.0f,  -2.0f,
 387             -8.0f, -2.0f, -2.0f,   2.0f,
 388              4.0f,  2.0f, -6.0f,  14.0f };
 389
 390     topology topology(
 391             input_layout("input", input.get_layout()),
 392             data("weights", weights),
 393             binary_convolution("binary_conv", "input", { "weights" },
 394                                { 1,1,1,1 },
 395                                { 0,0,0,0 },
 396                                { 1,1,1,1 },
 397                                { 1,4,2,2 },
 398                                0, 0.0f,
 399                                data_types::f32,
 400                                padding{ { 0,0,0,0 }, 0 })
 401     );
 402
 403     cldnn::build_options options;
 404     options.set_option(cldnn::build_option::optimize_data(true));
 405
 406     network network(engine, topology, options);
 407     network.set_input_data("input", input);
 408
 409     auto outputs = network.execute();
 410     EXPECT_EQ(outputs.size(), size_t(1));
 411     EXPECT_EQ(outputs.begin()->first, "binary_conv");
 412
 413     auto output_memory = outputs.at("binary_conv").get_memory();
 414     auto output_layout = output_memory.get_layout();
 415     auto output_ptr = output_memory.pointer<float>();
 416
 417
 418     EXPECT_EQ(output_layout.format, format::bfyx);
 419     EXPECT_EQ(output_layout.data_type, data_types::f32);
 420     EXPECT_EQ(output_layout.size.batch[0], 1);
 421     EXPECT_EQ(output_layout.size.feature[0], 4);
 422     EXPECT_EQ(output_layout.size.spatial[1], 2);
 423     EXPECT_EQ(output_layout.size.spatial[0], 2);
 424
 425     for (size_t i = 0; i < output_layout.count(); i++)
 426     {
 427         EXPECT_EQ(output_ptr[i], output_vec[i]) << "index="<< i;
 428     }
 429 }
 430
 431
 432 TEST(binary_convolution, basic_convolution_1x1_single_packed_channel_fp16) {
 433     const auto& engine = get_test_engine();
 434
 435     auto input = memory::allocate(engine, { data_types::bin, format::b_fs_yx_32fp, { 1, 16, 2, 2 } });
 436     auto weights = memory::allocate(engine, { data_types::bin, format::bfyx, { 4, 16, 1, 1 } });
 437
 438     // 0 0 1 0  0 1 0 0  1 0 1 0  1 0 1 0
 439     // 1 0 0 0  0 1 1 0  0 1 1 0  1 0 1 0
 440     // 1 1 0 0  1 0 1 1  1 1 1 1  1 0 1 0
 441     // 0 0 0 0  0 0 0 0  0 0 0 0  0 0 0 1
 442     set_binary_values<uint32_t>(input, { 21796, 22113, 24531, 32768 });
 443
 444     // 1 1 1 1  1 1 1 1  1 1 1 1  1 1 1 1
 445     // 0 1 0 1  0 1 0 1  1 0 1 0  1 0 1 0
 446     // 1 0 1 0  1 0 1 0  0 1 0 1  0 1 0 1
 447     // 0 0 0 0  0 0 0 0  0 0 0 0  0 0 0 0
 448     set_binary_values<uint16_t>(weights, { 65535, 21930, 43605, 0 });
 449
 450     // 16 - 2*popcount(1 1 0 1  1 0 1 1  0 1 0 1  0 1 0 1) = -4
 451     // 16 - 2*popcount(0 1 1 1  1 0 0 1  1 0 0 1  0 1 0 1) = -2
 452     // 16 - 2*popcount(0 0 1 1  0 1 0 0  0 0 0 0  0 1 0 1) = 6
 453     // 16 - 2*popcount(1 1 1 1  1 1 1 1  1 1 1 1  1 1 1 0) = -14
 454
 455     // 16 - 2*popcount(0 1 1 1  0 0 0 1  0 0 0 0  0 0 0 0) = 8
 456     // 16 - 2*popcount(1 1 0 1  0 0 1 1  1 1 0 0  0 0 0 0) = 2
 457     // 16 - 2*popcount(1 0 0 1  1 1 1 0  0 1 0 1  0 0 0 0) = 2
 458     // 16 - 2*popcount(0 1 0 1  0 1 0 1  1 0 1 0  1 0 1 1) = -2
 459
 460     // 16 - 2*popcount(1 0 0 0  1 1 1 0  1 1 1 1  1 1 1 1) = -8
 461     // 16 - 2*popcount(0 0 1 0  1 1 0 0  0 0 1 1  1 1 1 1) = -2
 462     // 16 - 2*popcount(0 1 1 0  0 0 0 1  1 0 1 0  1 1 1 1) = -2
 463     // 16 - 2*popcount(1 0 1 0  1 0 1 0  0 1 0 1  0 1 0 0) = 2
 464
 465     // 16 - 2*popcount(0 0 1 0  0 1 0 0  1 0 1 0  1 0 1 0) = 4
 466     // 16 - 2*popcount(1 0 0 0  0 1 1 0  0 1 1 0  1 0 1 0) = 2
 467     // 16 - 2*popcount(1 1 0 0  1 0 1 1  1 1 1 1  1 0 1 0) = -6
 468     // 16 - 2*popcount(0 0 0 0  0 0 0 0  0 0 0 0  0 0 0 1) = 14
 469     VF<float> output_vec = {
 470             -4.0f, -2.0f,  6.0f, -14.0f,
 471              8.0f,  2.0f,  2.0f,  -2.0f,
 472             -8.0f, -2.0f, -2.0f,   2.0f,
 473              4.0f,  2.0f, -6.0f,  14.0f };
 474
 475     topology topology(
 476             input_layout("input", input.get_layout()),
 477             data("weights", weights),
 478             binary_convolution("binary_conv", "input", { "weights" },
 479                                { 1,1,1,1 },
 480                                { 0,0,0,0 },
 481                                { 1,1,1,1 },
 482                                { 1,4,2,2 },
 483                                0, 0.0f,
 484                                data_types::f16,
 485                                padding{ { 0,0,0,0 }, 0 })
 486     );
 487
 488     cldnn::build_options options;
 489     options.set_option(cldnn::build_option::optimize_data(true));
 490
 491     network network(engine, topology, options);
 492     network.set_input_data("input", input);
 493
 494     auto outputs = network.execute();
 495     EXPECT_EQ(outputs.size(), size_t(1));
 496     EXPECT_EQ(outputs.begin()->first, "binary_conv");
 497
 498     auto output_memory = outputs.at("binary_conv").get_memory();
 499     auto output_layout = output_memory.get_layout();
 500     auto output_ptr = output_memory.pointer<uint16_t>();
 501
 502
 503     EXPECT_EQ(output_layout.format, format::bfyx);
 504     EXPECT_EQ(output_layout.data_type, data_types::f16);
 505     EXPECT_EQ(output_layout.size.batch[0], 1);
 506     EXPECT_EQ(output_layout.size.feature[0], 4);
 507     EXPECT_EQ(output_layout.size.spatial[1], 2);
 508     EXPECT_EQ(output_layout.size.spatial[0], 2);
 509
 510     for (size_t i = 0; i < output_layout.count(); i++)
 511     {
 512         EXPECT_EQ(float16_to_float32(output_ptr[i]), output_vec[i]) << "index="<< i;
 513     }
 514 }
 515