openmp: Relax handling of implicit map vs. existing device mappings
authorChung-Lin Tang <cltang@codesourcery.com>
Fri, 12 Nov 2021 12:29:00 +0000 (20:29 +0800)
committerChung-Lin Tang <cltang@codesourcery.com>
Fri, 12 Nov 2021 12:29:48 +0000 (20:29 +0800)
This patch implements relaxing the requirements when a map with the implicit
attribute encounters an overlapping existing map. As the OpenMP 5.0 spec
describes on page 320, lines 18-27 (and 5.1 spec, page 352, lines 13-22):

"If a single contiguous part of the original storage of a list item with an
 implicit data-mapping attribute has corresponding storage in the device data
 environment prior to a task encountering the construct that is associated with
 the map clause, only that part of the original storage will have corresponding
 storage in the device data environment as a result of the map clause."

2021-11-12  Chung-Lin Tang  <cltang@codesourcery.com>

include/ChangeLog:

* gomp-constants.h (GOMP_MAP_FLAG_SPECIAL_3): Define special bit macro.
(GOMP_MAP_IMPLICIT): New special map kind bits value.
(GOMP_MAP_FLAG_SPECIAL_BITS): Define helper mask for whole set of
special map kind bits.
(GOMP_MAP_IMPLICIT_P): New predicate macro for implicit map kinds.

gcc/ChangeLog:

* tree.h (OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P): New access macro for
'implicit' bit, using 'base.deprecated_flag' field of tree_node.
* tree-pretty-print.c (dump_omp_clause): Add support for printing
implicit attribute in tree dumping.
* gimplify.c (gimplify_adjust_omp_clauses_1):
Set OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P to 1 if map clause is implicitly
created.
(gimplify_adjust_omp_clauses): Adjust place of adding implicitly created
clauses, from simple append, to starting of list, after non-map clauses.
* omp-low.c (lower_omp_target): Add GOMP_MAP_IMPLICIT bits into kind
values passed to libgomp for implicit maps.

gcc/testsuite/ChangeLog:

* c-c++-common/gomp/target-implicit-map-1.c: New test.
* c-c++-common/goacc/combined-reduction.c: Adjust scan test pattern.
* c-c++-common/goacc/firstprivate-mappings-1.c: Likewise.
* c-c++-common/goacc/mdc-1.c: Likewise.
* g++.dg/goacc/firstprivate-mappings-1.C: Likewise.

libgomp/ChangeLog:

* target.c (gomp_map_vars_existing): Add 'bool implicit' parameter, add
implicit map handling to allow a "superset" existing map as valid case.
(get_kind): Adjust to filter out GOMP_MAP_IMPLICIT bits in return value.
(get_implicit): New function to extract implicit status.
(gomp_map_fields_existing): Adjust arguments in calls to
gomp_map_vars_existing, and add uses of get_implicit.
(gomp_map_vars_internal): Likewise.
* testsuite/libgomp.c-c++-common/target-implicit-map-1.c: New test.

12 files changed:
gcc/gimplify.c
gcc/omp-low.c
gcc/testsuite/c-c++-common/goacc/combined-reduction.c
gcc/testsuite/c-c++-common/goacc/firstprivate-mappings-1.c
gcc/testsuite/c-c++-common/goacc/mdc-1.c
gcc/testsuite/c-c++-common/gomp/target-implicit-map-1.c [new file with mode: 0644]
gcc/testsuite/g++.dg/goacc/firstprivate-mappings-1.C
gcc/tree-pretty-print.c
gcc/tree.h
include/gomp-constants.h
libgomp/target.c
libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-1.c [new file with mode: 0644]

index e5877c8..4e022d8 100644 (file)
@@ -10889,6 +10889,10 @@ gimplify_adjust_omp_clauses_1 (splay_tree_node n, void *data)
          gcc_unreachable ();
        }
       OMP_CLAUSE_SET_MAP_KIND (clause, kind);
+      /* Setting of the implicit flag for the runtime is currently disabled for
+        OpenACC.  */
+      if ((gimplify_omp_ctxp->region_type & ORT_ACC) == 0)
+       OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P (clause) = 1;
       if (DECL_SIZE (decl)
          && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
        {
@@ -11504,9 +11508,15 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
        list_p = &OMP_CLAUSE_CHAIN (c);
     }
 
-  /* Add in any implicit data sharing.  */
+  /* Add in any implicit data sharing.  Implicit clauses are added at the start
+     of the clause list, but after any non-map clauses.  */
   struct gimplify_adjust_omp_clauses_data data;
-  data.list_p = list_p;
+  tree *implicit_add_list_p = orig_list_p;
+  while (*implicit_add_list_p
+        && OMP_CLAUSE_CODE (*implicit_add_list_p) != OMP_CLAUSE_MAP)
+    implicit_add_list_p = &OMP_CLAUSE_CHAIN (*implicit_add_list_p);
+
+  data.list_p = implicit_add_list_p;
   data.pre_p = pre_p;
   splay_tree_foreach (ctx->variables, gimplify_adjust_omp_clauses_1, &data);
 
index 5b6aa30..63a47f6 100644 (file)
@@ -13168,6 +13168,19 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
                    else if (integer_nonzerop (s))
                      tkind_zero = tkind;
                  }
+               if (tkind_zero == tkind
+                   && OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P (c)
+                   && (((tkind & GOMP_MAP_FLAG_SPECIAL_BITS)
+                        & ~GOMP_MAP_IMPLICIT)
+                       == 0))
+                 {
+                   /* If this is an implicit map, and the GOMP_MAP_IMPLICIT
+                      bits are not interfered by other special bit encodings,
+                      then turn the GOMP_IMPLICIT_BIT flag on for the runtime
+                      to see.  */
+                   tkind |= GOMP_MAP_IMPLICIT;
+                   tkind_zero = tkind;
+                 }
                break;
              case OMP_CLAUSE_FIRSTPRIVATE:
                gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt));
index ecf23f5..74ab05b 100644 (file)
@@ -23,7 +23,7 @@ main ()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "omp target oacc_parallel reduction.+:v1. map.tofrom:v1" 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "omp target oacc_parallel reduction.+:v1. firstprivate.n. map.tofrom:v1" 1 "gimple" } } */
 /* { dg-final { scan-tree-dump-times "acc loop reduction.+:v1. private.i." 1 "gimple" } } */
 /* { dg-final { scan-tree-dump-times "omp target oacc_kernels map.force_tofrom:n .len: 4.. map.force_tofrom:v1 .len: 4.." 1 "gimple" } } */
 /* { dg-final { scan-tree-dump-times "acc loop reduction.+:v1. private.i." 1 "gimple" } } */
index 7987bea..5134ef6 100644 (file)
@@ -419,12 +419,7 @@ vla (int array_li)
   copyout (array_so)
   /* The gimplifier has created an implicit 'firstprivate' clause for the array
      length.
-     { dg-final { scan-tree-dump {(?n)#pragma omp target oacc_parallel map\(from:array_so \[len: 4\]\) firstprivate\(array_li.[0-9]+\)} omplower { target { ! c++ } } } }
-     { dg-final { scan-tree-dump {(?n)#pragma omp target oacc_parallel map\(from:array_so \[len: 4\]\) firstprivate\(} omplower { target { c++ } } } }
-     (C++ computes an intermediate value, so can't scan for 'firstprivate(array_li)'.)  */
-  /* For C, non-LP64, the gimplifier has also created a mapping for the array
-     itself; PR90859.
-     { dg-final { scan-tree-dump {(?n)#pragma omp target oacc_parallel map\(from:array_so \[len: 4\]\) firstprivate\(array_li.[0-9]+\) map\(tofrom:\(\*array.[0-9]+\) \[len: D\.[0-9]+\]\) map\(firstprivate:array \[pointer assign, bias: 0\]\) \[} omplower { target { c && { ! lp64 } } } } } */
+     { dg-final { scan-tree-dump {(?n)#pragma omp target oacc_parallel firstprivate\(array_li.[0-9]+\) map\(from:array_so \[len: 4\]\) \[} omplower } } */
   {
     array_so = sizeof array;
   }
index c2b8dc6..0a123be 100644 (file)
@@ -45,7 +45,7 @@ t1 ()
 
 /* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_data map.to:s .len: 32.." 1 "omplower" } } */
 /* { dg-final { scan-tree-dump-times "pragma omp target oacc_data map.tofrom:.z .len: 40.. map.struct:s .len: 1.. map.alloc:s.a .len: 8.. map.tofrom:._1 .len: 40.. map.attach:s.a .bias: 0.." 1 "omplower" } } */
-/* { dg-final { scan-tree-dump-times "pragma omp target oacc_parallel map.attach:s.e .bias: 0.. map.tofrom:s .len: 32" 1 "omplower" } } */
+/* { dg-final { scan-tree-dump-times "pragma omp target oacc_parallel map.tofrom:s .len: 32.. map.attach:s.e .bias: 0.." 1 "omplower" } } */
 /* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_data map.attach:a .bias: 0.." 1 "omplower" } } */
 /* { dg-final { scan-tree-dump-times "pragma omp target oacc_exit_data map.detach:a .bias: 0.." 1 "omplower" } } */
 /* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_data map.to:a .len: 8.." 1 "omplower" } } */
diff --git a/gcc/testsuite/c-c++-common/gomp/target-implicit-map-1.c b/gcc/testsuite/c-c++-common/gomp/target-implicit-map-1.c
new file mode 100644 (file)
index 0000000..52944fd
--- /dev/null
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fdump-tree-gimple" } */
+#ifdef __cplusplus
+extern "C"
+#else
+extern
+#endif
+void abort (void);
+
+int
+main (void)
+{
+  #define N 5
+  int array[N][N];
+
+  for (int i = 0; i < N; i++)
+    {
+      #pragma omp target enter data map(alloc: array[i:1][0:N])
+
+      #pragma omp target
+      for (int j = 0; j < N; j++)
+       array[i][j] = i * 10 + j;
+
+      #pragma omp target exit data map(from: array[i:1][0:N])
+    }
+
+  for (int i = 0; i < N; i++)
+    for (int j = 0; j < N; j++)
+      if (array[i][j] != i + j)
+       abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump {#pragma omp target enter data map\(alloc:array\[[^]]+\]\[0\] \[len: [0-9]+\]\)} "gimple" } } */
+
+/* { dg-final { scan-tree-dump {#pragma omp target num_teams.* firstprivate\(i\) map\(tofrom:array \[len: [0-9]+\]\[implicit\]\)} "gimple" } } */
+
+/* { dg-final { scan-tree-dump {#pragma omp target exit data map\(from:array\[[^]]+\]\[0\] \[len: [0-9]+\]\)} "gimple" } } */
index 1b1badb..99a3bd4 100644 (file)
@@ -416,7 +416,7 @@ vla (int &array_li)
   copyout (array_so)
   /* The gimplifier has created an implicit 'firstprivate' clause for the array
      length.
-     { dg-final { scan-tree-dump {(?n)#pragma omp target oacc_parallel map\(from:array_so \[len: 4\]\) firstprivate\(} omplower } }
+     { dg-final { scan-tree-dump {(?n)#pragma omp target oacc_parallel firstprivate\([^)]+\) map\(from:array_so \[len: 4\]\)} omplower } }
      (C++ computes an intermediate value, so can't scan for 'firstprivate(array_li)'.)  */
   {
     array_so = sizeof array;
index f6383b9..fcc0796 100644 (file)
@@ -971,6 +971,9 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags)
                             spc, flags, false);
          pp_right_bracket (pp);
        }
+      if (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_MAP
+         && OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P (clause))
+       pp_string (pp, "[implicit]");
       pp_right_paren (pp);
       break;
 
index 92c3d77..03719b1 100644 (file)
@@ -1689,6 +1689,11 @@ class auto_suppress_location_wrappers
    map clause.  */
 #define OMP_CLAUSE_MAP_IMPLICIT(NODE) \
   (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP)->base.default_def_flag)
+/* Nonzero if this map clause is to be indicated to the runtime as 'implicit',
+   due to being created through implicit data-mapping rules in the middle-end.
+   NOTE: this is different than OMP_CLAUSE_MAP_IMPLICIT.  */
+#define OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P(NODE) \
+  (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP)->base.deprecated_flag)
 
 /* True on an OMP_CLAUSE_USE_DEVICE_PTR with an OpenACC 'if_present'
    clause.  */
index ebd0801..3e42d71 100644 (file)
 #define GOMP_MAP_FLAG_SPECIAL_0                (1 << 2)
 #define GOMP_MAP_FLAG_SPECIAL_1                (1 << 3)
 #define GOMP_MAP_FLAG_SPECIAL_2                (1 << 4)
+#define GOMP_MAP_FLAG_SPECIAL_3                (1 << 5)
 #define GOMP_MAP_FLAG_SPECIAL_4                (1 << 6)
 #define GOMP_MAP_FLAG_SPECIAL          (GOMP_MAP_FLAG_SPECIAL_1 \
                                         | GOMP_MAP_FLAG_SPECIAL_0)
 #define GOMP_MAP_DEEP_COPY             (GOMP_MAP_FLAG_SPECIAL_4 \
                                         | GOMP_MAP_FLAG_SPECIAL_2)
+/* This value indicates the map was created implicitly according to
+   OpenMP rules.  */
+#define GOMP_MAP_IMPLICIT              (GOMP_MAP_FLAG_SPECIAL_3 \
+                                        | GOMP_MAP_FLAG_SPECIAL_4)
+/* Mask for entire set of special map kind bits.  */
+#define GOMP_MAP_FLAG_SPECIAL_BITS     (GOMP_MAP_FLAG_SPECIAL_0 \
+                                        | GOMP_MAP_FLAG_SPECIAL_1 \
+                                        | GOMP_MAP_FLAG_SPECIAL_2 \
+                                        | GOMP_MAP_FLAG_SPECIAL_3 \
+                                        | GOMP_MAP_FLAG_SPECIAL_4)
 /* Flag to force a specific behavior (or else, trigger a run-time error).  */
 #define GOMP_MAP_FLAG_FORCE            (1 << 7)
 
@@ -186,6 +197,9 @@ enum gomp_map_kind
 #define GOMP_MAP_ALWAYS_P(X) \
   (GOMP_MAP_ALWAYS_TO_P (X) || ((X) == GOMP_MAP_ALWAYS_FROM))
 
+#define GOMP_MAP_IMPLICIT_P(X) \
+  (((X) & GOMP_MAP_FLAG_SPECIAL_BITS) == GOMP_MAP_IMPLICIT)
+
 
 /* Asynchronous behavior.  Keep in sync with
    libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_async_t.  */
index ecf09f9..3c1eee2 100644 (file)
@@ -539,7 +539,7 @@ static inline void
 gomp_map_vars_existing (struct gomp_device_descr *devicep,
                        struct goacc_asyncqueue *aq, splay_tree_key oldn,
                        splay_tree_key newn, struct target_var_desc *tgt_var,
-                       unsigned char kind, bool always_to_flag,
+                       unsigned char kind, bool always_to_flag, bool implicit,
                        struct gomp_coalesce_buf *cbuf,
                        htab_t *refcount_set)
 {
@@ -550,11 +550,22 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep,
   tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
   tgt_var->is_attach = false;
   tgt_var->offset = newn->host_start - oldn->host_start;
-  tgt_var->length = newn->host_end - newn->host_start;
+
+  /* For implicit maps, old contained in new is valid.  */
+  bool implicit_subset = (implicit
+                         && newn->host_start <= oldn->host_start
+                         && oldn->host_end <= newn->host_end);
+  if (implicit_subset)
+    tgt_var->length = oldn->host_end - oldn->host_start;
+  else
+    tgt_var->length = newn->host_end - newn->host_start;
 
   if ((kind & GOMP_MAP_FLAG_FORCE)
-      || oldn->host_start > newn->host_start
-      || oldn->host_end < newn->host_end)
+      /* For implicit maps, old contained in new is valid.  */
+      || !(implicit_subset
+          /* Otherwise, new contained inside old is considered valid.  */
+          || (oldn->host_start <= newn->host_start
+              && newn->host_end <= oldn->host_end)))
     {
       gomp_mutex_unlock (&devicep->lock);
       gomp_fatal ("Trying to map into device [%p..%p) object when "
@@ -564,11 +575,17 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep,
     }
 
   if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
-    gomp_copy_host2dev (devicep, aq,
-                       (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
-                                 + newn->host_start - oldn->host_start),
-                       (void *) newn->host_start,
-                       newn->host_end - newn->host_start, false, cbuf);
+    {
+      /* Implicit + always should not happen. If this does occur, below
+        address/length adjustment is a TODO.  */
+      assert (!implicit_subset);
+
+      gomp_copy_host2dev (devicep, aq,
+                         (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
+                                   + newn->host_start - oldn->host_start),
+                         (void *) newn->host_start,
+                         newn->host_end - newn->host_start, false, cbuf);
+    }
 
   gomp_increment_refcount (oldn, refcount_set);
 }
@@ -576,8 +593,24 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep,
 static int
 get_kind (bool short_mapkind, void *kinds, int idx)
 {
-  return short_mapkind ? ((unsigned short *) kinds)[idx]
-                      : ((unsigned char *) kinds)[idx];
+  if (!short_mapkind)
+    return ((unsigned char *) kinds)[idx];
+
+  int val = ((unsigned short *) kinds)[idx];
+  if (GOMP_MAP_IMPLICIT_P (val))
+    val &= ~GOMP_MAP_IMPLICIT;
+  return val;
+}
+
+
+static bool
+get_implicit (bool short_mapkind, void *kinds, int idx)
+{
+  if (!short_mapkind)
+    return false;
+
+  int val = ((unsigned short *) kinds)[idx];
+  return GOMP_MAP_IMPLICIT_P (val);
 }
 
 static void
@@ -631,6 +664,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt,
   struct splay_tree_s *mem_map = &devicep->mem_map;
   struct splay_tree_key_s cur_node;
   int kind;
+  bool implicit;
   const bool short_mapkind = true;
   const int typemask = short_mapkind ? 0xff : 0x7;
 
@@ -638,12 +672,14 @@ gomp_map_fields_existing (struct target_mem_desc *tgt,
   cur_node.host_end = cur_node.host_start + sizes[i];
   splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
   kind = get_kind (short_mapkind, kinds, i);
+  implicit = get_implicit (short_mapkind, kinds, i);
   if (n2
       && n2->tgt == n->tgt
       && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
     {
       gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
-                             kind & typemask, false, cbuf, refcount_set);
+                             kind & typemask, false, implicit, cbuf,
+                             refcount_set);
       return;
     }
   if (sizes[i] == 0)
@@ -659,7 +695,8 @@ gomp_map_fields_existing (struct target_mem_desc *tgt,
                 == n2->tgt_offset - n->tgt_offset)
            {
              gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
-                                     kind & typemask, false, cbuf, refcount_set);
+                                     kind & typemask, false, implicit, cbuf,
+                                     refcount_set);
              return;
            }
        }
@@ -671,7 +708,8 @@ gomp_map_fields_existing (struct target_mem_desc *tgt,
          && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
        {
          gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
-                                 kind & typemask, false, cbuf, refcount_set);
+                                 kind & typemask, false, implicit, cbuf,
+                                 refcount_set);
          return;
        }
     }
@@ -903,6 +941,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
   for (i = 0; i < mapnum; i++)
     {
       int kind = get_kind (short_mapkind, kinds, i);
+      bool implicit = get_implicit (short_mapkind, kinds, i);
       if (hostaddrs[i] == NULL
          || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
        {
@@ -1085,8 +1124,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
                }
            }
          gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
-                                 kind & typemask, always_to_cnt > 0, NULL,
-                                 refcount_set);
+                                 kind & typemask, always_to_cnt > 0, implicit,
+                                 NULL, refcount_set);
          i += always_to_cnt;
        }
       else
@@ -1256,6 +1295,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
        else if (tgt->list[i].key == NULL)
          {
            int kind = get_kind (short_mapkind, kinds, i);
+           bool implicit = get_implicit (short_mapkind, kinds, i);
            if (hostaddrs[i] == NULL)
              continue;
            switch (kind & typemask)
@@ -1415,7 +1455,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
            splay_tree_key n = splay_tree_lookup (mem_map, k);
            if (n && n->refcount != REFCOUNT_LINK)
              gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
-                                     kind & typemask, false, cbufp,
+                                     kind & typemask, false, implicit, cbufp,
                                      refcount_set);
            else
              {
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-1.c
new file mode 100644 (file)
index 0000000..f2e7293
--- /dev/null
@@ -0,0 +1,31 @@
+#ifdef __cplusplus
+extern "C"
+#else
+extern
+#endif
+void abort (void);
+
+int
+main (void)
+{
+  #define N 5
+  int array[N][N];
+
+  for (int i = 0; i < N; i++)
+    {
+      #pragma omp target enter data map(alloc: array[i:1][0:N])
+
+      #pragma omp target
+      for (int j = 0; j < N; j++)
+       array[i][j] = i + j;
+
+      #pragma omp target exit data map(from: array[i:1][0:N])
+    }
+
+  for (int i = 0; i < N; i++)
+    for (int j = 0; j < N; j++)
+      if (array[i][j] != i + j)
+       abort ();
+
+  return 0;
+}