Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, DEPTH_IN);
Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0);
- int out_index[4];
- int in_index[4];
- in_index[0] = get_global_id(0);//W
- in_index[1] = get_global_id(1);//H
- in_index[2] = get_global_id(2) % DEPTH_IN;//C
- in_index[3] = get_global_id(2) / DEPTH_IN;//B
- out_index[0] = in_index[P1];
- out_index[1] = in_index[P2];
- out_index[2] = in_index[P3];
- out_index[3] = in_index[P4];
+ // WHCN format
+ int in_index[]={
+ get_global_id(0),
+ get_global_id(1),
+ get_global_id(2) % DEPTH_IN,
+ get_global_id(2) / DEPTH_IN,
+ };
- *((__global DATA_TYPE *)tensor4D_offset(&out, out_index[0],out_index[1],out_index[2],out_index[3])) = *((__global DATA_TYPE *)in.ptr);
+ // New locations based on Permuted index calc as out_index[index] = in_index[new_index]
+ *((__global DATA_TYPE *)
+ tensor4D_offset(&out, in_index[P1], in_index[P2], in_index[P3], in_index[P4])) = *((__global DATA_TYPE *)in.ptr);
}
#endif // defined(DATA_TYPE) && defined(DEPTH_IN) && defined(P1) && defined(P2) && defined(P3) && defined(P4)