#include "common.h"
#ifdef FUNCTION_PROFILE
#include "functable.h"
-#endif
+#endif
#if defined(Z13)
#define MULTI_THREAD_MINIMAL 200000
#else
-#define MULTI_THREAD_MINIMAL 10000
+#define MULTI_THREAD_MINIMAL 10000
#endif
#ifndef CBLAS
if (incy < 0) y -= (n - 1) * incy;
#ifdef SMP
- nthreads = num_cpu_avail(1);
-
//disable multi-thread when incx==0 or incy==0
//In that case, the threads would be dependent.
- if (incx == 0 || incy == 0)
- nthreads = 1;
-
+ //
//Temporarily work-around the low performance issue with small imput size &
//multithreads.
- if (n <= MULTI_THREAD_MINIMAL)
+ if (incx == 0 || incy == 0 || n <= MULTI_THREAD_MINIMAL)
nthreads = 1;
+ else
+ nthreads = num_cpu_avail(1);
if (nthreads == 1) {
#endif
#ifdef SMP
- nthreads = num_cpu_avail(1);
-
if (n <= 1048576 )
nthreads = 1;
+ else
+ nthreads = num_cpu_avail(1);
+
if (nthreads == 1) {
#endif
if (incy < 0) y -= (n - 1) * incy * 2;
#ifdef SMP
- nthreads = num_cpu_avail(1);
-
//disable multi-thread when incx==0 or incy==0
//In that case, the threads would be dependent.
- if (incx == 0 || incy == 0)
- nthreads = 1;
-
- //Work around the low performance issue with small imput size &
+ //
+ //Temporarily work-around the low performance issue with small imput size &
//multithreads.
- if (n <= MULTI_THREAD_MINIMAL) {
+ if (incx == 0 || incy == 0 || n <= MULTI_THREAD_MINIMAL)
nthreads = 1;
- }
+ else
+ nthreads = num_cpu_avail(1);
+
if (nthreads == 1) {
#endif
FUNCTION_PROFILE_START();
#ifdef SMP
- nthreads = num_cpu_avail(1);
-
if ( n <= 1048576 )
nthreads = 1;
+ else
+ nthreads = num_cpu_avail(1);
if (nthreads == 1) {
#endif
if (incy < 0) y -= (n - 1) * incy * 2;
#ifdef SMP
- nthreads = num_cpu_avail(1);
-
//disable multi-thread when incx==0 or incy==0
//In that case, the threads would be dependent.
if (incx == 0 || incy == 0)
nthreads = 1;
+ else
+ nthreads = num_cpu_avail(1);
if (nthreads == 1) {
#endif
FLOAT asum = 0.0;
#if defined(SMP)
- nthreads = num_cpu_avail(1);
-
- if (inc_x == 0)
- nthreads = 1;
-
- if (n <= 10000)
+ if (inc_x == 0 || n <= 10000)
nthreads = 1;
+ else
+ nthreads = num_cpu_avail(1);
if (nthreads == 1) {
asum = casum_compute(n, x, inc_x);
if (n <= 0) return 0;
#if defined(SMP)
- nthreads = num_cpu_avail(1);
-
- if (inc_x == 0)
- nthreads = 1;
-
- if (n <= 10000)
+ if (inc_x == 0 || n <= 10000)
nthreads = 1;
+ else
+ nthreads = num_cpu_avail(1);
if (nthreads == 1) {
do_copy(n, x, inc_x, y, inc_y);
FLOAT asum = 0.0;
#if defined(SMP)
- nthreads = num_cpu_avail(1);
-
- if (inc_x == 0)
- nthreads = 1;
-
- if (n <= 10000)
+ if (inc_x == 0 || n <= 10000)
nthreads = 1;
+ else
+ nthreads = num_cpu_avail(1);
if (nthreads == 1) {
asum = dasum_compute(n, x, inc_x);
" faddp "DOTF", v0.2d \n"
#endif /* !defined(DSDOT) */
-#else /* !defined(DOUBLE) */
+#else /* !defined(DOUBLE) */
#define KERNEL_F1 \
" ldr "TMPX", ["X"] \n" \
" ldr "TMPY", ["Y"] \n" \
RETURN_TYPE dot = 0.0;
#if defined(SMP)
- nthreads = num_cpu_avail(1);
-
- if (inc_x == 0 || inc_y == 0)
- nthreads = 1;
-
- if (n <= 10000)
+ if (inc_x == 0 || inc_y == 0 || n <= 10000)
nthreads = 1;
+ else
+ nthreads = num_cpu_avail(1);
if (nthreads == 1) {
dot = dot_compute(n, x, inc_x, y, inc_y);
if (n <= 0 || inc_x <= 0) return 0.0;
#if defined(SMP)
- nthreads = num_cpu_avail(1);
-
if (n <= 10000)
nthreads = 1;
+ else
+ nthreads = num_cpu_avail(1);
if (nthreads == 1) {
nrm2_compute(n, x, inc_x, &ssq, &scale);
if (n <= 0 || inc_x <= 0) return 0.0;
#if defined(SMP)
- nthreads = num_cpu_avail(1);
-
if (n <= 10000)
nthreads = 1;
+ else
+ nthreads = num_cpu_avail(1);
if (nthreads == 1) {
nrm2 = nrm2_compute(n, x, inc_x);
BLASLONG max_index = 0;
#if defined(SMP)
- nthreads = num_cpu_avail(1);
-
- if (inc_x == 0)
- nthreads = 1;
-
- if (n <= 10000)
+ if (inc_x == 0 || n <= 10000)
nthreads = 1;
+ else
+ nthreads = num_cpu_avail(1);
if (nthreads == 1) {
max_index = iamax_compute(n, x, inc_x);
BLASLONG max_index = 0;
#if defined(SMP)
- nthreads = num_cpu_avail(1);
-
- if (inc_x == 0)
- nthreads = 1;
-
- if (n <= 10000)
+ if (inc_x == 0 || n <= 10000)
nthreads = 1;
+ else
+ nthreads = num_cpu_avail(1);
if (nthreads == 1) {
max_index = izamax_compute(n, x, inc_x);
FLOAT asum = 0.0;
#if defined(SMP)
- nthreads = num_cpu_avail(1);
-
- if (inc_x == 0)
- nthreads = 1;
-
- if (n <= 10000)
+ if (inc_x == 0 || n <= 10000)
nthreads = 1;
+ else
+ nthreads = num_cpu_avail(1);
if (nthreads == 1) {
asum = sasum_compute(n, x, inc_x);
if (n <= 0 || inc_x <= 0) return 0.0;
#if defined(SMP)
- nthreads = num_cpu_avail(1);
-
if (n <= 10000)
nthreads = 1;
+ else
+ nthreads = num_cpu_avail(1);
if (nthreads == 1) {
nrm2_double = nrm2_compute(n, x, inc_x);
FLOAT asum = 0.0;
#if defined(SMP)
- nthreads = num_cpu_avail(1);
-
- if (inc_x == 0)
- nthreads = 1;
-
- if (n <= 10000)
+ if (inc_x == 0 || n <= 10000)
nthreads = 1;
+ else
+ nthreads = num_cpu_avail(1);
if (nthreads == 1) {
asum = zasum_compute(n, x, inc_x);
CIMAG(zdot) = 0.0;
#if defined(SMP)
- nthreads = num_cpu_avail(1);
-
- if (inc_x == 0 || inc_y == 0)
- nthreads = 1;
-
- if (n <= 10000)
+ if (inc_x == 0 || inc_y == 0 || n <= 10000)
nthreads = 1;
+ else
+ nthreads = num_cpu_avail(1);
if (nthreads == 1) {
zdot_compute(n, x, inc_x, y, inc_y, &zdot);
#include "common.h"
-#if defined(BULLDOZER)
+#if defined(BULLDOZER)
#include "ddot_microk_bulldozer-2.c"
#elif defined(STEAMROLLER) || defined(EXCAVATOR)
#include "ddot_microk_steamroller-2.c"
#elif defined(PILEDRIVER)
#include "ddot_microk_piledriver-2.c"
-#elif defined(NEHALEM)
+#elif defined(NEHALEM)
#include "ddot_microk_nehalem-2.c"
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "ddot_microk_haswell-2.c"
FLOAT temp1 = 0.0;
FLOAT temp2 = 0.0;
- BLASLONG n1 = n & -4;
+ BLASLONG n1 = n & -4;
while(i < n1)
{
FLOAT dot = 0.0;
#if defined(SMP)
- nthreads = num_cpu_avail(1);
-
- if (inc_x == 0 || inc_y == 0)
- nthreads = 1;
-
- if (n <= 10000)
+ if (inc_x == 0 || inc_y == 0 || n <= 10000)
nthreads = 1;
+ else
+ nthreads = num_cpu_avail(1);
if (nthreads == 1) {
dot = dot_compute(n, x, inc_x, y, inc_y);