arm_compute v18.05
[platform/upstream/armcl.git] / src / core / GLES_COMPUTE / cs_shaders / direct_convolution3x3.cs
index 855d450..e51cc37 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,6 +25,10 @@ layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z =
 
 #include "helpers_cs.h"
 
+#ifdef FUSED_ACTIVATION
+#include "activation_layer_helpers_cs.h"
+#endif /* FUSED_ACTIVATION */
+
 #if defined(DATA_TYPE_FP16)
 precision mediump float;
 #endif // DATA_TYPE_FP16
@@ -114,6 +118,10 @@ void main()
     pixels += LOAD(biases_ptr, VECTOR_OFFSET(biases_iter, z_index));
 #endif /* BIAS */
 
+#ifdef FUSED_ACTIVATION
+    pixels = ACT_OP(pixels);
+#endif /* FUSED_ACTIVATION */
+
     STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels);
 }
 
@@ -238,6 +246,11 @@ void main()
     pixels[1] += vec4(b);
 #endif /* BIAS */
 
+#ifdef FUSED_ACTIVATION
+    pixels[0] = ACT_OP(pixels[0]);
+    pixels[1] = ACT_OP(pixels[1]);
+#endif /* FUSED_ACTIVATION */
+
     VSTORE2_CURRENT_ITEM(dst_ptr, dst_iter, pixels);
 }
 
@@ -335,6 +348,10 @@ void main()
     pixels += b;
 #endif /* BIAS */
 
+#ifdef FUSED_ACTIVATION
+    pixels = ACT_OP(pixels);
+#endif /* FUSED_ACTIVATION */
+
     STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels);
 }
 
@@ -434,6 +451,12 @@ void main()
     pixels[2] += vec4(b);
 #endif /* BIAS */
 
+#ifdef FUSED_ACTIVATION
+    pixels[0] = ACT_OP(pixels[0]);
+    pixels[1] = ACT_OP(pixels[1]);
+    pixels[2] = ACT_OP(pixels[2]);
+#endif /* FUSED_ACTIVATION */
+
     STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels[0]);
     STORE(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
     STORE(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
@@ -601,6 +624,12 @@ void main()
     }
 #endif /* BIAS */
 
+#ifdef FUSED_ACTIVATION
+    pixels[0] = ACT_OP(pixels[0]);
+    pixels[1] = ACT_OP(pixels[1]);
+    pixels[2] = ACT_OP(pixels[2]);
+#endif /* FUSED_ACTIVATION */
+
     STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
     STORE_PACK8_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
     STORE_PACK8_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
@@ -728,6 +757,10 @@ void main()
     pixels += vec4(b);
 #endif /* BIAS */
 
+#ifdef FUSED_ACTIVATION
+    pixels = ACT_OP(pixels);
+#endif /* FUSED_ACTIVATION */
+
     STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels);
 }
 
@@ -841,6 +874,12 @@ void main()
     }
 #endif /* BIAS */
 
+#ifdef FUSED_ACTIVATION
+    pixels[0] = ACT_OP(pixels[0]);
+    pixels[1] = ACT_OP(pixels[1]);
+    pixels[2] = ACT_OP(pixels[2]);
+#endif /* FUSED_ACTIVATION */
+
     STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
     STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
     STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
@@ -962,6 +1001,13 @@ void main()
     }
 #endif /* BIAS */
 
+#ifdef FUSED_ACTIVATION
+    pixels[0] = ACT_OP(pixels[0]);
+    pixels[1] = ACT_OP(pixels[1]);
+    pixels[2] = ACT_OP(pixels[2]);
+    pixels[3] = ACT_OP(pixels[3]);
+#endif /* FUSED_ACTIVATION */
+
     STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
     STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
     STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);
@@ -1087,6 +1133,13 @@ void main()
         }
 #endif /* BIAS */
 
+#ifdef FUSED_ACTIVATION
+        pixels[0] = ACT_OP(pixels[0]);
+        pixels[1] = ACT_OP(pixels[1]);
+        pixels[2] = ACT_OP(pixels[2]);
+        pixels[3] = ACT_OP(pixels[3]);
+#endif /* FUSED_ACTIVATION */
+
         STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels[0]);
         STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 1, 0), pixels[1]);
         STORE_PACK4_HALF(dst_ptr, TENSOR3D_OFFSET(dst_iter, 0, 2, 0), pixels[2]);