From b343a29dbcbfc5eaca11243ac603a1e5b48630f3 Mon Sep 17 00:00:00 2001 From: Kewen Lin Date: Thu, 28 Oct 2021 01:11:56 -0500 Subject: [PATCH] rs6000: Fix ICE of vect cost related to V1TI [PR102767] As PR102767 shows, the commit r12-3482 exposed one ICE in function rs6000_builtin_vectorization_cost. We claims V1TI supports movmisalign on rs6000 (See define_expand "movmisalign"), so it return true in rs6000_builtin_support_vector_misalignment for misalign 8. Later in the cost querying function rs6000_builtin_vectorization_cost, we don't have the arms to handle the V1TI input under (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN). The proposed fix is to add the consideration for V1TI, simply make it as the cost for doubleword which is apparently bigger than the cost of scalar, won't have the vectorization to happen, just to keep consistency and avoid ICE. Another thought is to not support movmisalign for V1TI, but it sounds like a bad idea since it doesn't match the reality. Note that this patch also fixes up the wrong indentations around. gcc/ChangeLog: PR target/102767 * config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): Consider V1T1 mode for unaligned load and store. gcc/testsuite/ChangeLog: PR target/102767 * gcc.target/powerpc/ppc-fortran/pr102767.f90: New file. --- gcc/config/rs6000/rs6000.c | 64 +++++++++++----------- .../gcc.target/powerpc/ppc-fortran/pr102767.f90 | 21 +++++++ 2 files changed, 54 insertions(+), 31 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/ppc-fortran/pr102767.f90 diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 1dcb9b1..bee3fc8 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -5146,7 +5146,8 @@ rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN) { elements = TYPE_VECTOR_SUBPARTS (vectype); - if (elements == 2) + /* See PR102767, consider V1TI to keep consistency. */ + if (elements == 2 || elements == 1) /* Double word aligned. */ return 4; @@ -5183,39 +5184,40 @@ rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, if (TARGET_EFFICIENT_UNALIGNED_VSX) return 1; - if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN) - { - elements = TYPE_VECTOR_SUBPARTS (vectype); - if (elements == 2) - /* Double word aligned. */ - return 2; + if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN) + { + elements = TYPE_VECTOR_SUBPARTS (vectype); + /* See PR102767, consider V1TI to keep consistency. */ + if (elements == 2 || elements == 1) + /* Double word aligned. */ + return 2; - if (elements == 4) - { - switch (misalign) - { - case 8: - /* Double word aligned. */ - return 2; - - case -1: - /* Unknown misalignment. */ - case 4: - case 12: - /* Word aligned. */ - return 23; - - default: - gcc_unreachable (); - } - } - } + if (elements == 4) + { + switch (misalign) + { + case 8: + /* Double word aligned. */ + return 2; - if (TARGET_ALTIVEC) - /* Misaligned stores are not supported. */ - gcc_unreachable (); + case -1: + /* Unknown misalignment. */ + case 4: + case 12: + /* Word aligned. */ + return 23; - return 2; + default: + gcc_unreachable (); + } + } + } + + if (TARGET_ALTIVEC) + /* Misaligned stores are not supported. */ + gcc_unreachable (); + + return 2; case vec_construct: /* This is a rough approximation assuming non-constant elements diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fortran/pr102767.f90 b/gcc/testsuite/gcc.target/powerpc/ppc-fortran/pr102767.f90 new file mode 100644 index 0000000..6853cfe --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fortran/pr102767.f90 @@ -0,0 +1,21 @@ +! { dg-require-effective-target powerpc_vsx_ok } +! { dg-options "-mvsx -O2 -ftree-vectorize -mno-efficient-unaligned-vsx" } + +INTERFACE + FUNCTION elemental_mult (a, b, c) + type(*), DIMENSION(..) :: a, b, c + END +END INTERFACE + +allocatable z +integer, dimension(2,2) :: a, b +call test_CFI_address +contains + subroutine test_CFI_address + if (elemental_mult (z, x, y) .ne. 0) stop + a = reshape ([4,3,2,1], [2,2]) + b = reshape ([2,3,4,5], [2,2]) + if (elemental_mult (i, a, b) .ne. 0) stop + end +end + -- 2.7.4