for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-
-#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
- copy(ary) copy(ondev) copyout(gangsize, workersize, vectorsize)
+
+#define NG 32
+#define NW 32
+#define VL 32
+#pragma acc parallel num_gangs(NG) num_workers(NW) vector_length(VL) \
+ copy(ary) copy(ondev)
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
{
#pragma acc loop gang worker vector
else
ary[ix] = ix;
}
-
- gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
- workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
- vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
+ gangsize = NG;
+ workersize = NW;
+ vectorsize = VL;
+#ifdef ACC_DEVICE_TYPE_radeon
+ /* AMD GCN has an upper limit of 'num_workers(16)'. */
+ if (workersize > 16)
+ workersize = 16;
+ /* AMD GCN uses the autovectorizer for the vector dimension: the use
+ of a function call in vector-partitioned code in this test is not
+ currently supported. */
+ vectorsize = 1;
+#endif
for (ix = 0; ix < N; ix++)
{
int ary[N];
int ix;
int exit = 0;
- int gangsize = 0, workersize = 0, vectorsize = 0;
+ int gangsize, workersize, vectorsize;
int *gangdist, *workerdist, *vectordist;
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
- copy(ary) copyout(gangsize, workersize, vectorsize)
+#define NG 32
+#define NW 32
+#define VL 32
+#pragma acc parallel num_gangs(NG) num_workers(NW) vector_length(VL) \
+ copy(ary)
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
{
#pragma acc loop gang worker vector
ary[ix] = (g << 16) | (w << 8) | v;
}
-
- gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
- workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
- vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
+ gangsize = NG;
+ workersize = NW;
+ vectorsize = VL;
+#if defined ACC_DEVICE_TYPE_host
+ gangsize = 1;
+ workersize = 1;
+ vectorsize = 1;
+#elif defined ACC_DEVICE_TYPE_radeon
+ /* AMD GCN has an upper limit of 'num_workers(16)'. */
+ if (workersize > 16)
+ workersize = 16;
+ /* AMD GCN uses the autovectorizer for the vector dimension: the use
+ of a function call in vector-partitioned code in this test is not
+ currently supported. */
+ vectorsize = 1;
+#endif
gangdist = (int *) __builtin_alloca (gangsize * sizeof (int));
workerdist = (int *) __builtin_alloca (workersize * sizeof (int));
int w = (ary[ix] >> 8) & 255;
int v = ary[ix] & 255;
+ if (g >= gangsize
+ || w >= workersize
+ || v >= vectorsize)
+ __builtin_abort ();
+
gangdist[g]++;
workerdist[w]++;
vectordist[v]++;
int t = 0, h = 0;
int gangsize, workersize, vectorsize;
-#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
- copy(ondev) copyout(gangsize, workersize, vectorsize)
+#define NG 32
+#define NW 32
+#define VL 32
+#pragma acc parallel num_gangs(NG) num_workers(NW) vector_length(VL) \
+ copy(ondev)
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
{
#pragma acc loop gang worker vector reduction(+:t)
}
t += val;
}
- gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
- workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
- vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
+ gangsize = NG;
+ workersize = NW;
+ vectorsize = VL;
+#ifdef ACC_DEVICE_TYPE_radeon
+ /* AMD GCN has an upper limit of 'num_workers(16)'. */
+ if (workersize > 16)
+ workersize = 16;
+ /* AMD GCN uses the autovectorizer for the vector dimension: the use
+ of a function call in vector-partitioned code in this test is not
+ currently supported. */
+ vectorsize = 1;
+#endif
for (ix = 0; ix < N; ix++)
{
int t = 0, h = 0;
int vectorsize;
-#pragma acc parallel vector_length(32) copy(ondev) copyout(vectorsize)
+#define VL 32
+#pragma acc parallel vector_length(VL) copy(ondev)
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */
{
#pragma acc loop vector reduction (+:t)
}
t += val;
}
- vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
+ vectorsize = VL;
+#ifdef ACC_DEVICE_TYPE_radeon
+ /* AMD GCN uses the autovectorizer for the vector dimension: the use
+ of a function call in vector-partitioned code in this test is not
+ currently supported. */
+ vectorsize = 1;
+#endif
for (ix = 0; ix < N; ix++)
{
int q = 0, h = 0;
int vectorsize;
-#pragma acc parallel vector_length(32) copy(q) copy(ondev) copyout(vectorsize)
+#define VL 32
+#pragma acc parallel vector_length(VL) copy(q) copy(ondev)
/* { dg-note {variable 't' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
{
t += val;
}
q = t;
- vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
+ vectorsize = VL;
+#ifdef ACC_DEVICE_TYPE_radeon
+ /* AMD GCN uses the autovectorizer for the vector dimension: the use
+ of a function call in vector-partitioned code in this test is not
+ currently supported. */
+ vectorsize = 1;
+#endif
for (ix = 0; ix < N; ix++)
{
int t = 0, h = 0;
int workersize;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) \
- copyout(workersize)
+#define NW 32
+#define VL 32
+#pragma acc parallel num_workers(NW) vector_length(VL) \
+ copy(ondev)
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-3 } */
{
}
t += val;
}
- workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
}
+ workersize = NW;
+#ifdef ACC_DEVICE_TYPE_radeon
+ /* AMD GCN has an upper limit of 'num_workers(16)'. */
+ if (workersize > 16)
+ workersize = 16;
+#endif
for (ix = 0; ix < N; ix++)
{
int q = 0, h = 0;
int workersize;
-#pragma acc parallel num_workers(32) vector_length(32) copy(q) copy(ondev) \
- copyout(workersize)
+#define NW 32
+#define VL 32
+#pragma acc parallel num_workers(NW) vector_length(VL) \
+ copy(q) copy(ondev)
/* { dg-note {variable 't' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */
/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-4 } */
t += val;
}
q = t;
- workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
}
+ workersize = NW;
+#ifdef ACC_DEVICE_TYPE_radeon
+ /* AMD GCN has an upper limit of 'num_workers(16)'. */
+ if (workersize > 16)
+ workersize = 16;
+#endif
for (ix = 0; ix < N; ix++)
{
int t = 0, h = 0;
int workersize, vectorsize;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) \
- copyout(workersize, vectorsize)
+#define NW 32
+#define VL 32
+#pragma acc parallel num_workers(NW) vector_length(VL) \
+ copy(ondev)
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
{
#pragma acc loop worker vector reduction (+:t)
}
t += val;
}
- workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
- vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
+ workersize = NW;
+ vectorsize = VL;
+#ifdef ACC_DEVICE_TYPE_radeon
+ /* AMD GCN has an upper limit of 'num_workers(16)'. */
+ if (workersize > 16)
+ workersize = 16;
+ /* AMD GCN uses the autovectorizer for the vector dimension: the use
+ of a function call in vector-partitioned code in this test is not
+ currently supported. */
+ vectorsize = 1;
+#endif
for (ix = 0; ix < N; ix++)
{
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel vector_length(32) copy(ary) copy(ondev) \
- copyout(vectorsize)
+#define VL 32
+#pragma acc parallel vector_length(VL) \
+ copy(ary) copy(ondev)
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
{
#pragma acc loop vector
else
ary[ix] = ix;
}
- vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
+ vectorsize = VL;
+#ifdef ACC_DEVICE_TYPE_radeon
+ /* AMD GCN uses the autovectorizer for the vector dimension: the use
+ of a function call in vector-partitioned code in this test is not
+ currently supported. */
+ vectorsize = 1;
+#endif
for (ix = 0; ix < N; ix++)
{
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
- copyout(workersize)
+#define NW 32
+#define VL 32
+#pragma acc parallel num_workers(NW) vector_length(VL) \
+ copy(ary) copy(ondev)
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "vector" { target *-*-* } .-3 } */
{
else
ary[ix] = ix;
}
- workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
}
+ workersize = NW;
+#ifdef ACC_DEVICE_TYPE_radeon
+ /* AMD GCN has an upper limit of 'num_workers(16)'. */
+ if (workersize > 16)
+ workersize = 16;
+#endif
for (ix = 0; ix < N; ix++)
{
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
- copyout(workersize, vectorsize)
+#define NW 32
+#define VL 32
+#pragma acc parallel num_workers(NW) vector_length(VL) \
+ copy(ary) copy(ondev)
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
{
#pragma acc loop worker vector
else
ary[ix] = ix;
}
- workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
- vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
+ workersize = NW;
+ vectorsize = VL;
+#ifdef ACC_DEVICE_TYPE_radeon
+ /* AMD GCN has an upper limit of 'num_workers(16)'. */
+ if (workersize > 16)
+ workersize = 16;
+ /* AMD GCN uses the autovectorizer for the vector dimension: the use
+ of a function call in vector-partitioned code in this test is not
+ currently supported. */
+ vectorsize = 1;
+#endif
for (ix = 0; ix < N; ix++)
{
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev) copyout(gangsize, workersize, vectorsize)
+#define NG 32
+#define NW 32
+#define VL 32
+#pragma acc parallel num_gangs(NG) num_workers(NW) vector_length(VL) \
+ copy(ary) copy(ondev)
{
ondev = acc_on_device (acc_device_not_host);
gang (ary);
- gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
- workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
- vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
+ gangsize = NG;
+ workersize = NW;
+ vectorsize = VL;
+#ifdef ACC_DEVICE_TYPE_radeon
+ /* AMD GCN has an upper limit of 'num_workers(16)'. */
+ if (workersize > 16)
+ workersize = 16;
+ /* AMD GCN uses the autovectorizer for the vector dimension: the use
+ of a function call in vector-partitioned code in this test is not
+ currently supported. */
+ vectorsize = 1;
+#endif
for (ix = 0; ix < N; ix++)
{
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel vector_length(32) copy(ary) copy(ondev) \
- copyout(vectorsize)
+#define VL 32
+#pragma acc parallel vector_length(VL) \
+ copy(ary) copy(ondev)
{
ondev = acc_on_device (acc_device_not_host);
vector (ary);
- vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
+ vectorsize = VL;
+#ifdef ACC_DEVICE_TYPE_radeon
+ /* AMD GCN uses the autovectorizer for the vector dimension: the use
+ of a function call in vector-partitioned code in this test is not
+ currently supported. */
+ vectorsize = 1;
+#endif
for (ix = 0; ix < N; ix++)
{
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
- copyout(workersize)
+#define NW 32
+#define VL 32
+#pragma acc parallel num_workers(NW) vector_length(VL) \
+ copy(ary) copy(ondev)
{
ondev = acc_on_device (acc_device_not_host);
worker (ary);
- workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
}
+ workersize = NW;
+#ifdef ACC_DEVICE_TYPE_radeon
+ /* AMD GCN has an upper limit of 'num_workers(16)'. */
+ if (workersize > 16)
+ workersize = 16;
+#endif
for (ix = 0; ix < N; ix++)
{
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
- copyout(workersize, vectorsize)
+#define NW 32
+#define VL 32
+#pragma acc parallel num_workers(NW) vector_length(VL) \
+ copy(ary) copy(ondev)
{
ondev = acc_on_device (acc_device_not_host);
worker (ary);
- workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
- vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
+ workersize = NW;
+ vectorsize = VL;
+#ifdef ACC_DEVICE_TYPE_radeon
+ /* AMD GCN has an upper limit of 'num_workers(16)'. */
+ if (workersize > 16)
+ workersize = 16;
+ /* AMD GCN uses the autovectorizer for the vector dimension: the use
+ of a function call in vector-partitioned code in this test is not
+ currently supported. */
+ vectorsize = 1;
+#endif
for (ix = 0; ix < N; ix++)
{