#include "include/include_all.cl"
-#if ELTWISE_LAYOUT_BASED || QUANTIZATION_TERM
+#ifdef INPUT_STRIDED
+
+#define GET_INDEX(prefix, num) \
+ CAT(CAT(prefix, num), _OFFSET) + \
+ ((d1 * CAT(CAT(prefix, num), _STRIDE_X)) % CAT(CAT(prefix, num), _SIZE_X))*CAT(CAT(prefix, num), _X_PITCH) +\
+ ((d2 * CAT(CAT(prefix, num), _STRIDE_Y)) % CAT(CAT(prefix, num), _SIZE_Y))*CAT(CAT(prefix, num), _Y_PITCH) +\
+ (d3 % CAT(CAT(prefix, num), _FEATURE_NUM))*CAT(CAT(prefix, num), _FEATURE_PITCH) + \
+ (d4 % CAT(CAT(prefix, num), _BATCH_NUM ))*CAT(CAT(prefix, num), _BATCH_PITCH)
+
+#else
+
+#if ELTWISE_LAYOUT_BASED || QUANTIZATION_TERM || ELTWISE_BROADCAST
#define GET_INDEX(prefix, num) \
CAT(CAT(prefix, num), _OFFSET) + \
- (d1 % CAT(CAT(prefix, num), _SIZE_X ))*CAT(CAT(prefix, num), _X_PITCH) + \
- (d2 % CAT(CAT(prefix, num), _SIZE_Y ))*CAT(CAT(prefix, num), _Y_PITCH) + \
- (d3 % CAT(CAT(prefix, num), _FEATURE_NUM))*CAT(CAT(prefix, num), _FEATURE_PITCH) + \
+ (d1 % CAT(CAT(prefix, num), _SIZE_X ))*CAT(CAT(prefix, num), _X_PITCH) + \
+ (d2 % CAT(CAT(prefix, num), _SIZE_Y ))*CAT(CAT(prefix, num), _Y_PITCH) + \
+ (d3 % CAT(CAT(prefix, num), _FEATURE_NUM))*CAT(CAT(prefix, num), _FEATURE_PITCH) + \
(d4 % CAT(CAT(prefix, num), _BATCH_NUM ))*CAT(CAT(prefix, num), _BATCH_PITCH)
#elif ELTWISE_NO_PITCH_SAME_DIMS
#endif
+#endif
+
+
KERNEL(eltwise)(
INPUTS_DECLS
__global UNIT_TYPE* output
#endif
)
{
-#if ELTWISE_LAYOUT_BASED || QUANTIZATION_TERM
- const uint d1 = get_global_id(GWS_YX) % INPUT0_SIZE_X; // X
- const uint d2 = get_global_id(GWS_YX) / INPUT0_SIZE_X; // Y
+#if ELTWISE_LAYOUT_BASED || QUANTIZATION_TERM || ELTWISE_BROADCAST
+ const uint d1 = get_global_id(GWS_YX) % OUTPUT_SIZE_X; // X
+ const uint d2 = get_global_id(GWS_YX) / OUTPUT_SIZE_X; // Y
const uint d3 = get_global_id(GWS_FEATURE); // Feature
const uint d4 = get_global_id(GWS_BATCH); // Batch
const uint d2 = get_global_id(1);
const uint d3 = get_global_id(2) % OUTPUT_SIZES[2];
const uint d4 = get_global_id(2) / OUTPUT_SIZES[2];
-
+
uint output_offset = OUTPUT_OFFSET +
d1*OUTPUT_PITCHES[0] +
d2*OUTPUT_PITCHES[1] +
#else
UNIT_TYPE res;
#endif
-
+
DO_ELTWISE;
#if QUANTIZATION_TERM