From a152954ea4fee516e83b4f75a17818fbc8d555bb Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Wed, 19 Dec 2018 14:20:54 +0000 Subject: [PATCH] [nvptx] Commit passing pr85381-*.c test-cases Add pr85381*.c test-cases that are already passing without the fix for PR85381. Build and reg-tested on x86_64 with nvptx accelerator. 2018-12-19 Tom de Vries * testsuite/libgomp.oacc-c-c++-common/pr85381-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/pr85381-3.c: New test. * testsuite/libgomp.oacc-c-c++-common/pr85381-4.c: New test. From-SVN: r267268 --- libgomp/ChangeLog | 6 ++++ .../libgomp.oacc-c-c++-common/pr85381-2.c | 36 ++++++++++++++++++++++ .../libgomp.oacc-c-c++-common/pr85381-3.c | 35 +++++++++++++++++++++ .../libgomp.oacc-c-c++-common/pr85381-4.c | 27 ++++++++++++++++ 4 files changed, 104 insertions(+) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-3.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-4.c diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index ad0abb8..ae8801e 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,5 +1,11 @@ 2018-12-19 Tom de Vries + * testsuite/libgomp.oacc-c-c++-common/pr85381-2.c: New test. + * testsuite/libgomp.oacc-c-c++-common/pr85381-3.c: New test. + * testsuite/libgomp.oacc-c-c++-common/pr85381-4.c: New test. + +2018-12-19 Tom de Vries + * testsuite/lib/libgomp.exp: Add load_lib of scanoffloadrtl.exp. * testsuite/libgomp.oacc-c-c++-common/nvptx-merged-loop.c: Move from gcc/testsuite/gcc.dg/goacc. diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-2.c new file mode 100644 index 0000000..6570c64 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-2.c @@ -0,0 +1,36 @@ +/* { dg-additional-options "-save-temps" } */ +/* { dg-do run { target openacc_nvidia_accel_selected } } + { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */ + +int +main (void) +{ + int v1; + + #pragma acc parallel + #pragma acc loop worker + for (v1 = 0; v1 < 20; v1 += 2) + ; + + return 0; +} + +/* Todo: Boths bar.syncs can be removed. + Atm we generate this dead code inbetween forked and joining: + + mov.u32 %r28, %ntid.y; + mov.u32 %r29, %tid.y; + add.u32 %r30, %r29, %r29; + setp.gt.s32 %r31, %r30, 19; + @%r31 bra $L2; + add.u32 %r25, %r28, %r28; + mov.u32 %r24, %r30; + $L3: + add.u32 %r24, %r24, %r25; + setp.le.s32 %r33, %r24, 19; + @%r33 bra $L3; + $L2: + + so the loop is not recognized as empty loop (which we detect by seeing if + joining immediately follows forked). */ +/* { dg-final { scan-assembler-times "bar.sync" 2 } } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-3.c new file mode 100644 index 0000000..c5d1c5a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-3.c @@ -0,0 +1,35 @@ +/* { dg-additional-options "-save-temps -w" } */ +/* { dg-do run { target openacc_nvidia_accel_selected } } + { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */ + +int a; +#pragma acc declare create(a) + +#pragma acc routine vector +void __attribute__((noinline, noclone)) +foo_v (void) +{ + a = 1; +} + +#pragma acc routine worker +void __attribute__((noinline, noclone)) +foo_w (void) +{ + a = 2; +} + +int +main (void) +{ + + #pragma acc parallel + foo_v (); + + #pragma acc parallel + foo_w (); + + return 0; +} + +/* { dg-final { scan-assembler-not "bar.sync" } } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-4.c new file mode 100644 index 0000000..d955d79 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-4.c @@ -0,0 +1,27 @@ +/* { dg-additional-options "-save-temps -w" } */ +/* { dg-do run { target openacc_nvidia_accel_selected } } + { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */ + +#define n 1024 + +int +main (void) +{ + #pragma acc parallel + { + #pragma acc loop worker + for (int i = 0; i < n; i++) + ; + + #pragma acc loop worker + for (int i = 0; i < n; i++) + ; + } + + return 0; +} + +/* Atm, %ntid.y is broadcast from one loop to the next, so there are 2 bar.syncs + for that (the other two are there for the same reason as in pr85381-2.c). + Todo: Recompute %ntid.y instead of broadcasting it. */ +/* { dg-final { scan-assembler-times "bar.sync" 4 } } */ -- 2.7.4