re PR fortran/90539 (481.wrf slowdown by 25% on Intel Kaby with -Ofast -march=native...
authorThomas Koenig <tkoenig@gcc.gnu.org>
Wed, 29 May 2019 20:30:45 +0000 (20:30 +0000)
committerThomas Koenig <tkoenig@gcc.gnu.org>
Wed, 29 May 2019 20:30:45 +0000 (20:30 +0000)
2019-05-29  Thomas Koenig  <tkoenig@gcc.gnu.org>

PR fortran/90539
* gfortran.h (gfc_has_dimen_vector_ref): Add prototype.
* trans.h (gfc_conv_subref_array_arg): Add argument check_contiguous.
(gfc_conv_is_contiguous_expr): Add prototype.
* frontend-passes.c (has_dimen_vector_ref): Remove prototype,
rename to
(gfc_has_dimen_vector_ref): New function name.
(matmul_temp_args): Use gfc_has_dimen_vector_ref.
(inline_matmul_assign): Likewise.
* trans-array.c (gfc_conv_array_parameter): Also check for absence
of a vector subscript before calling gfc_conv_subref_array_arg.
Pass additional argument to gfc_conv_subref_array_arg.
* trans-expr.c (gfc_conv_subref_array_arg): Add argument
check_contiguous. If that is true, check if the argument
is contiguous and do not repack in that case.
* trans-intrinsic.c (gfc_conv_intrinsic_is_contiguous): Split
away most of the work into, and call
(gfc_conv_intrinsic_is_coniguous_expr): New function.

2019-05-29  Thomas Koenig  <tkoenig@gcc.gnu.org>

PR fortran/90539
* gfortran.dg/internal_pack_21.f90: Adjust scan patterns.
* gfortran.dg/internal_pack_22.f90: New test.
* gfortran.dg/internal_pack_23.f90: New test.

From-SVN: r271751

gcc/fortran/ChangeLog
gcc/fortran/frontend-passes.c
gcc/fortran/gfortran.h
gcc/fortran/trans-array.c
gcc/fortran/trans-expr.c
gcc/fortran/trans-intrinsic.c
gcc/fortran/trans.h
gcc/testsuite/ChangeLog
gcc/testsuite/gfortran.dg/internal_pack_21.f90
gcc/testsuite/gfortran.dg/internal_pack_22.f90 [new file with mode: 0644]
gcc/testsuite/gfortran.dg/internal_pack_23.f90 [new file with mode: 0644]

index 07b485b..4c126b7 100644 (file)
@@ -1,3 +1,24 @@
+2019-05-29  Thomas Koenig  <tkoenig@gcc.gnu.org>
+
+       PR fortran/90539
+       * gfortran.h (gfc_has_dimen_vector_ref): Add prototype.
+       * trans.h (gfc_conv_subref_array_arg): Add argument check_contiguous.
+       (gfc_conv_is_contiguous_expr): Add prototype.
+       * frontend-passes.c (has_dimen_vector_ref): Remove prototype,
+       rename to
+       (gfc_has_dimen_vector_ref): New function name.
+       (matmul_temp_args): Use gfc_has_dimen_vector_ref.
+       (inline_matmul_assign): Likewise.
+       * trans-array.c (gfc_conv_array_parameter): Also check for absence
+       of a vector subscript before calling gfc_conv_subref_array_arg.
+       Pass additional argument to gfc_conv_subref_array_arg.
+       * trans-expr.c (gfc_conv_subref_array_arg): Add argument
+       check_contiguous. If that is true, check if the argument
+       is contiguous and do not repack in that case.
+       * trans-intrinsic.c (gfc_conv_intrinsic_is_contiguous): Split
+       away most of the work into, and call
+       (gfc_conv_intrinsic_is_coniguous_expr): New function.
+
 2019-05-29  Jakub Jelinek  <jakub@redhat.com>
 
        PR fortran/90329
index d4264da..87df504 100644 (file)
@@ -54,7 +54,6 @@ static gfc_code * create_do_loop (gfc_expr *, gfc_expr *, gfc_expr *,
 static gfc_expr* check_conjg_transpose_variable (gfc_expr *, bool *,
                                                 bool *);
 static int call_external_blas (gfc_code **, int *, void *);
-static bool has_dimen_vector_ref (gfc_expr *);
 static int matmul_temp_args (gfc_code **, int *,void *data);
 static int index_interchange (gfc_code **, int*, void *);
 
@@ -2868,7 +2867,7 @@ matmul_temp_args (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED,
     {
       if (matrix_a->expr_type == EXPR_VARIABLE
          && (gfc_check_dependency (matrix_a, expr1, true)
-             || has_dimen_vector_ref (matrix_a)))
+             || gfc_has_dimen_vector_ref (matrix_a)))
        a_tmp = true;
     }
   else
@@ -2881,7 +2880,7 @@ matmul_temp_args (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED,
     {
       if (matrix_b->expr_type == EXPR_VARIABLE
          && (gfc_check_dependency (matrix_b, expr1, true)
-             || has_dimen_vector_ref (matrix_b)))
+             || gfc_has_dimen_vector_ref (matrix_b)))
        b_tmp = true;
     }
   else
@@ -3681,8 +3680,8 @@ scalarized_expr (gfc_expr *e_in, gfc_expr **index, int count_index)
 
 /* Helper function to check for a dimen vector as subscript.  */
 
-static bool
-has_dimen_vector_ref (gfc_expr *e)
+bool
+gfc_has_dimen_vector_ref (gfc_expr *e)
 {
   gfc_array_ref *ar;
   int i;
@@ -3838,8 +3837,8 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees,
   if (matrix_b == NULL)
     return 0;
 
-  if (has_dimen_vector_ref (expr1) || has_dimen_vector_ref (matrix_a)
-      || has_dimen_vector_ref (matrix_b))
+  if (gfc_has_dimen_vector_ref (expr1) || gfc_has_dimen_vector_ref (matrix_a)
+      || gfc_has_dimen_vector_ref (matrix_b))
     return 0;
 
   /* We do not handle data dependencies yet.  */
index 54987ac..798297b 100644 (file)
@@ -3535,6 +3535,7 @@ typedef int (*walk_expr_fn_t) (gfc_expr **, int *, void *);
 int gfc_dummy_code_callback (gfc_code **, int *, void *);
 int gfc_expr_walker (gfc_expr **, walk_expr_fn_t, void *);
 int gfc_code_walker (gfc_code **, walk_code_fn_t, walk_expr_fn_t, void *);
+bool gfc_has_dimen_vector_ref (gfc_expr *e);
 
 /* simplify.c */
 
index 9c96d89..56d534d 100644 (file)
@@ -8139,12 +8139,12 @@ gfc_conv_array_parameter (gfc_se * se, gfc_expr * expr, bool g77,
         optimizers.  */
 
       if (g77 && optimize && !optimize_size && expr->expr_type == EXPR_VARIABLE
-         && !is_pointer (expr) && (fsym == NULL
-                                   || fsym->ts.type != BT_ASSUMED))
+         && !is_pointer (expr) && ! gfc_has_dimen_vector_ref (expr)
+         && (fsym == NULL || fsym->ts.type != BT_ASSUMED))
        {
          gfc_conv_subref_array_arg (se, expr, g77,
                                     fsym ? fsym->attr.intent : INTENT_INOUT,
-                                    false, fsym, proc_name, sym);
+                                    false, fsym, proc_name, sym, true);
          return;
        }
 
index b7a8456..5183029 100644 (file)
@@ -4579,7 +4579,7 @@ void
 gfc_conv_subref_array_arg (gfc_se *se, gfc_expr * expr, int g77,
                           sym_intent intent, bool formal_ptr,
                           const gfc_symbol *fsym, const char *proc_name,
-                          gfc_symbol *sym)
+                          gfc_symbol *sym, bool check_contiguous)
 {
   gfc_se lse;
   gfc_se rse;
@@ -4602,7 +4602,7 @@ gfc_conv_subref_array_arg (gfc_se *se, gfc_expr * expr, int g77,
 
   pass_optional = fsym && fsym->attr.optional && sym && sym->attr.optional;
 
-  if (pass_optional)
+  if (pass_optional || check_contiguous)
     {
       gfc_init_se (&work_se, NULL);
       parmse = &work_se;
@@ -4880,50 +4880,136 @@ class_array_fcn:
   else
     parmse->expr = gfc_build_addr_expr (NULL_TREE, parmse->expr);
 
-  if (pass_optional)
+  /* Basically make this into
+     
+     if (present)
+       {
+        if (contiguous)
+          {
+            pointer = a;
+          }
+        else
+          {
+            parmse->pre();
+            pointer = parmse->expr;
+          }
+       }
+     else
+       pointer = NULL;
+
+     foo (pointer);
+     if (present && !contiguous)
+          se->post();
+
+     */
+
+  if (pass_optional || check_contiguous)
     {
-      tree present;
       tree type;
       stmtblock_t else_block;
       tree pre_stmts, post_stmts;
       tree pointer;
       tree else_stmt;
+      tree present_var = NULL_TREE;
+      tree cont_var = NULL_TREE;
+      tree post_cond;
 
-      /* Make this into
+      type = TREE_TYPE (parmse->expr);
+      pointer = gfc_create_var (type, "arg_ptr");
+
+      if (check_contiguous)
+       {
+         gfc_se cont_se, array_se;
+         stmtblock_t if_block, else_block;
+         tree if_stmt, else_stmt;
+
+         cont_var = gfc_create_var (boolean_type_node, "contiguous");
+
+         /* cont_var = is_contiguous (expr); .  */
+         gfc_init_se (&cont_se, parmse);
+         gfc_conv_is_contiguous_expr (&cont_se, expr);
+         gfc_add_block_to_block (&se->pre, &(&cont_se)->pre);
+         gfc_add_modify (&se->pre, cont_var, cont_se.expr);
+         gfc_add_block_to_block (&se->pre, &(&cont_se)->post);
+
+         /* arrayse->expr = descriptor of a.  */
+         gfc_init_se (&array_se, se);
+         gfc_conv_expr_descriptor (&array_se, expr);
+         gfc_add_block_to_block (&se->pre, &(&array_se)->pre);
+         gfc_add_block_to_block (&se->pre, &(&array_se)->post);
+
+         /* if_stmt = { pointer = &a[0]; } .  */
+         gfc_init_block (&if_block);
+         tmp = gfc_conv_array_data (array_se.expr);
+         tmp = fold_convert (type, tmp);
+         gfc_add_modify (&if_block, pointer, tmp);
+         if_stmt = gfc_finish_block (&if_block);
+
+         /* else_stmt = { parmse->pre(); pointer = parmse->expr; } .  */
+         gfc_init_block (&else_block);
+         gfc_add_block_to_block (&else_block, &parmse->pre);
+         gfc_add_modify (&else_block, pointer, parmse->expr);
+         else_stmt = gfc_finish_block (&else_block);
+
+         /* And put the above into an if statement.  */
+         pre_stmts = fold_build3_loc (input_location, COND_EXPR, void_type_node,
+                                     cont_var, if_stmt, else_stmt);
+       }
+      else
+       {
+         /* pointer = pramse->expr;  .  */
+         gfc_add_modify (&parmse->pre, pointer, parmse->expr);
+         pre_stmts = gfc_finish_block (&parmse->pre);
+       }
 
-        if (present (a))
-          {
-             parmse->pre;
-             optional = parse->expr;
-          }
-         else
-          optional = NULL;
-         call foo (optional);
-         if (present (a))
-            parmse->post;
+      if (pass_optional)
+       {
+         present_var = gfc_create_var (boolean_type_node, "present");
 
-      */
+         /* present_var = present(sym); .  */
+         tmp = gfc_conv_expr_present (sym);
+         tmp = fold_convert (boolean_type_node, tmp);
+         gfc_add_modify (&se->pre, present_var, tmp);
 
-      type = TREE_TYPE (parmse->expr);
-      pointer = gfc_create_var (type, "optional");
-      tmp = gfc_conv_expr_present (sym);
-      present = gfc_evaluate_now (tmp, &se->pre);
-      gfc_add_modify (&parmse->pre, pointer, parmse->expr);
-      pre_stmts = gfc_finish_block (&parmse->pre);
-
-      gfc_init_block (&else_block);
-      gfc_add_modify (&else_block, pointer, build_int_cst (type, 0));
-      else_stmt = gfc_finish_block (&else_block);
-
-      tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, present,
-                            pre_stmts, else_stmt);
-      gfc_add_expr_to_block (&se->pre, tmp);
+         /* else_stmt = { pointer = NULL; } .  */
+         gfc_init_block (&else_block);
+         gfc_add_modify (&else_block, pointer, build_int_cst (type, 0));
+         else_stmt = gfc_finish_block (&else_block);
+
+         tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, present_var,
+                                pre_stmts, else_stmt);
+         gfc_add_expr_to_block (&se->pre, tmp);
+
+
+       }
+      else
+       gfc_add_expr_to_block (&se->pre, pre_stmts);
 
       post_stmts = gfc_finish_block (&parmse->post);
-      tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, present,
+
+      /* Put together the post stuff, plus the optional
+        deallocation.  */
+      if (check_contiguous)
+       {
+         /* !cont_var.  */
+         tmp = fold_build2_loc (input_location, EQ_EXPR, boolean_type_node,
+                                cont_var,
+                                build_zero_cst (boolean_type_node));
+         if (pass_optional)
+           post_cond = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR,
+                                        boolean_type_node, present_var, tmp);
+         else
+           post_cond = tmp;
+       }
+      else
+       {
+         gcc_assert (pass_optional);
+         post_cond = present_var;
+       }
+
+      tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, post_cond,
                             post_stmts, build_empty_stmt (input_location));
       gfc_add_expr_to_block (&se->post, tmp);
-
       se->expr = pointer;
     }
 
index e0a4c67..f6edd68 100644 (file)
@@ -2832,6 +2832,17 @@ static void
 gfc_conv_intrinsic_is_contiguous (gfc_se * se, gfc_expr * expr)
 {
   gfc_expr *arg;
+  arg = expr->value.function.actual->expr;
+  gfc_conv_is_contiguous_expr (se, arg);
+  se->expr = fold_convert (gfc_typenode_for_spec (&expr->ts), se->expr);
+}
+
+/* This function does the work for gfc_conv_intrinsic_is_contiguous,
+   plus it can be called directly.  */
+
+void
+gfc_conv_is_contiguous_expr (gfc_se *se, gfc_expr *arg)
+{
   gfc_ss *ss;
   gfc_se argse;
   tree desc, tmp, stride, extent, cond;
@@ -2839,8 +2850,6 @@ gfc_conv_intrinsic_is_contiguous (gfc_se * se, gfc_expr * expr)
   tree fncall0;
   gfc_array_spec *as;
 
-  arg = expr->value.function.actual->expr;
-
   if (arg->ts.type == BT_CLASS)
     gfc_add_class_array_ref (arg);
 
@@ -2878,7 +2887,7 @@ gfc_conv_intrinsic_is_contiguous (gfc_se * se, gfc_expr * expr)
       cond = fold_build2_loc (input_location, EQ_EXPR, boolean_type_node,
                              stride, build_int_cst (TREE_TYPE (stride), 1));
 
-      for (i = 0; i < expr->value.function.actual->expr->rank - 1; i++)
+      for (i = 0; i < arg->rank - 1; i++)
        {
          tmp = gfc_conv_descriptor_lbound_get (desc, gfc_rank_cst[i]);
          extent = gfc_conv_descriptor_ubound_get (desc, gfc_rank_cst[i]);
@@ -2896,7 +2905,7 @@ gfc_conv_intrinsic_is_contiguous (gfc_se * se, gfc_expr * expr)
          cond = fold_build2_loc (input_location, TRUTH_AND_EXPR,
                                  boolean_type_node, cond, tmp);
        }
-      se->expr = convert (gfc_typenode_for_spec (&expr->ts), cond);
+      se->expr = cond;
     }
 }
 
index e0118ab..0305d33 100644 (file)
@@ -535,7 +535,10 @@ int gfc_conv_procedure_call (gfc_se *, gfc_symbol *, gfc_actual_arglist *,
 void gfc_conv_subref_array_arg (gfc_se *, gfc_expr *, int, sym_intent, bool,
                                const gfc_symbol *fsym = NULL,
                                const char *proc_name = NULL,
-                               gfc_symbol *sym = NULL);
+                               gfc_symbol *sym = NULL,
+                               bool check_contiguous = false);
+
+void gfc_conv_is_contiguous_expr (gfc_se *, gfc_expr *);
 
 /* Generate code for a scalar assignment.  */
 tree gfc_trans_scalar_assign (gfc_se *, gfc_se *, gfc_typespec, bool, bool,
index bb500f0..a0e8b7b 100644 (file)
@@ -1,3 +1,10 @@
+2019-05-29  Thomas Koenig  <tkoenig@gcc.gnu.org>
+
+       PR fortran/90539
+       * gfortran.dg/internal_pack_21.f90: Adjust scan patterns.
+       * gfortran.dg/internal_pack_22.f90: New test.
+       * gfortran.dg/internal_pack_23.f90: New test.
+
 2019-05-29  Jan Hubicka  <hubicka@ucw.cz>
 
        * tree-ssa/alias-access-spath-1.c: new testcase.
index d0ce942..54e43ff 100644 (file)
@@ -20,5 +20,5 @@ END MODULE M1
 USE M1
 CALL S2()
 END
-! { dg-final { scan-tree-dump-times "optional" 4 "original" } }
+! { dg-final { scan-tree-dump-times "arg_ptr" 5 "original" } }
 ! { dg-final { scan-tree-dump-not "_gfortran_internal_unpack" "original" } }
diff --git a/gcc/testsuite/gfortran.dg/internal_pack_22.f90 b/gcc/testsuite/gfortran.dg/internal_pack_22.f90
new file mode 100644 (file)
index 0000000..4e9fe59
--- /dev/null
@@ -0,0 +1,36 @@
+! { dg-do run }
+! { dg-additional-options "-fdump-tree-original -O" }
+! Check that absent and present dummy arguments work with
+! packing when handing them down to an old-fashioned argument.
+
+module x
+  implicit none
+contains
+  subroutine foo (a,b)
+    real, dimension(:), intent(inout), optional :: a, b
+    if (present(a)) stop 1
+    if (.not. present(b)) stop 2
+    call bar (a, b)
+  end subroutine foo
+
+  subroutine bar (a,b)
+    real, dimension(2), intent(inout), optional :: a, b
+    real :: tmp
+    if (present(a)) stop 3
+    if (.not. present(b)) stop 4
+    tmp = b(2)
+    b(2) = b(1)
+    b(1) = tmp
+  end subroutine bar
+end module x
+
+program main
+  use x
+  implicit none
+  real, dimension(2) :: b
+  b(1) = 1.
+  b(2) = 42.
+  call foo(b=b)
+  if (b(1) /= 42. .or. b(2)  /= 1.) stop 5
+end program main
+! { dg-final { scan-tree-dump-not "_gfortran_internal_unpack" "original" } }
diff --git a/gcc/testsuite/gfortran.dg/internal_pack_23.f90 b/gcc/testsuite/gfortran.dg/internal_pack_23.f90
new file mode 100644 (file)
index 0000000..8df82c8
--- /dev/null
@@ -0,0 +1,27 @@
+! { dg-do run }
+! PR fortran/90539 - this used to cause an ICE.
+
+module t2
+  implicit none
+contains
+  subroutine foo(a)
+    real, dimension(*) :: a
+    if (a(1) /= 1.0 .or. a(2) /= 2.0) stop 1
+  end subroutine foo
+end module t2
+
+module t1
+  use t2
+  implicit none
+contains
+  subroutine bar(a)
+    real, dimension(:) :: a
+    if (a(1) /= 1.0 .or. a(2) /= 2.0) stop 1
+    call foo(a)
+  end subroutine bar
+end module t1
+
+program main
+  use t1
+  call bar([1.0, 2.0])
+end program main