Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / src / inference_engine / cpu_x86_sse42 / blob_transform_sse42.hpp
1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #pragma once
6
7 #include <stdint.h>
8 #include <stdlib.h>
9
10 namespace InferenceEngine {
11
12 //------------------------------------------------------------------------
13 //
14 // Blob-copy primitives namually vectored for SSE 4.2 (w/o OpenMP threads)
15 //
16 //------------------------------------------------------------------------
17
18 void blob_copy_4d_split_u8c3(const uint8_t *src_ptr,
19                                    uint8_t *dst_ptr,
20                                     size_t  N_src_stride,
21                                     size_t  H_src_stride,
22                                     size_t  N_dst_stride,
23                                     size_t  H_dst_stride,
24                                     size_t  C_dst_stride,
25                                        int  N,
26                                        int  H,
27                                        int  W);
28
29 void blob_copy_4d_split_f32c3(const float *src_ptr,
30                                     float *dst_ptr,
31                                    size_t  N_src_stride,
32                                    size_t  H_src_stride,
33                                    size_t  N_dst_stride,
34                                    size_t  H_dst_stride,
35                                    size_t  C_dst_stride,
36                                       int  N,
37                                       int  H,
38                                       int  W);
39
40 void blob_copy_4d_merge_u8c3(const uint8_t *src_ptr,
41                                    uint8_t *dst_ptr,
42                                     size_t  N_src_stride,
43                                     size_t  H_src_stride,
44                                     size_t  C_src_stride,
45                                     size_t  N_dst_stride,
46                                     size_t  H_dst_stride,
47                                        int  N,
48                                        int  H,
49                                        int  W);
50
51 void blob_copy_4d_merge_f32c3(const float *src_ptr,
52                                     float *dst_ptr,
53                                    size_t  N_src_stride,
54                                    size_t  H_src_stride,
55                                    size_t  C_src_stride,
56                                    size_t  N_dst_stride,
57                                    size_t  H_dst_stride,
58                                       int  N,
59                                       int  H,
60                                       int  W);
61
62 }  // namespace InferenceEngine