From d89f355e2df9b99f2d37c0c2820414d9da848661 Mon Sep 17 00:00:00 2001 From: Michael Meissner Date: Mon, 27 Mar 2017 19:19:00 +0000 Subject: [PATCH] re PR target/78543 (ICE in push_reload, at reload.c:1349 on powerpc64le-linux-gnu) [gcc] 2017-03-27 Michael Meissner PR target/78543 * config/rs6000/rs6000.md (bswaphi2_extenddi): Combine bswap HImode and SImode with zero extend to DImode to one insn. (bswap2_extenddi): Likewise. (bswapsi2_extenddi): Likewise. (bswaphi2_extendsi): Likewise. (bswaphi2): Combine bswap HImode and SImode into one insn. Separate memory insns from swapping register. (bswapsi2): Likewise. (bswap2): Likewise. (bswaphi2_internal): Delete, no longer used. (bswapsi2_internal): Likewise. (bswap2_load): Split bswap HImode/SImode into separate load, store, and gpr<-gpr swap insns. (bswap2_store): Likewise. (bswaphi2_reg): Register only splitter, combine with the splitter. (bswaphi2 splitter): Likewise. (bswapsi2_reg): Likewise. (bswapsi2 splitter): Likewise. (bswapdi2): If we have the LDBRX and STDBRX instructions, split the insns into load, store, and register/register insns. (bswapdi2_ldbrx): Likewise. (bswapdi2_load): Likewise. (bswapdi2_store): Likewise. (bswapdi2_reg): Likewise. [gcc/testsuite] 2017-03-27 Michael Meissner PR target/78543 * gcc.target/powerpc/pr78543.c: New test. From-SVN: r246508 --- gcc/ChangeLog | 28 +++++ gcc/config/rs6000/rs6000.md | 176 ++++++++++++++++------------- gcc/testsuite/ChangeLog | 5 + gcc/testsuite/gcc.target/powerpc/pr78543.c | 60 ++++++++++ 4 files changed, 189 insertions(+), 80 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr78543.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d219b8c..37a904f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,31 @@ +2017-03-27 Michael Meissner + + PR target/78543 + * config/rs6000/rs6000.md (bswaphi2_extenddi): Combine bswap + HImode and SImode with zero extend to DImode to one insn. + (bswap2_extenddi): Likewise. + (bswapsi2_extenddi): Likewise. + (bswaphi2_extendsi): Likewise. + (bswaphi2): Combine bswap HImode and SImode into one insn. + Separate memory insns from swapping register. + (bswapsi2): Likewise. + (bswap2): Likewise. + (bswaphi2_internal): Delete, no longer used. + (bswapsi2_internal): Likewise. + (bswap2_load): Split bswap HImode/SImode into separate load, + store, and gpr<-gpr swap insns. + (bswap2_store): Likewise. + (bswaphi2_reg): Register only splitter, combine with the splitter. + (bswaphi2 splitter): Likewise. + (bswapsi2_reg): Likewise. + (bswapsi2 splitter): Likewise. + (bswapdi2): If we have the LDBRX and STDBRX instructions, split + the insns into load, store, and register/register insns. + (bswapdi2_ldbrx): Likewise. + (bswapdi2_load): Likewise. + (bswapdi2_store): Likewise. + (bswapdi2_reg): Likewise. + 2017-03-27 Gunther Nikl * system.h (HAVE_DESIGNATED_INITIALIZERS): Fix non C++ case. diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 802e257..d1da804 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -2350,12 +2350,12 @@ ;; Since the hardware zeros the upper part of the register, save generating the ;; AND immediate if we are converting to unsigned -(define_insn "*bswaphi2_extenddi" +(define_insn "*bswap2_extenddi" [(set (match_operand:DI 0 "gpc_reg_operand" "=r") (zero_extend:DI - (bswap:HI (match_operand:HI 1 "memory_operand" "Z"))))] + (bswap:HSI (match_operand:HSI 1 "memory_operand" "Z"))))] "TARGET_POWERPC64" - "lhbrx %0,%y1" + "lbrx %0,%y1" [(set_attr "length" "4") (set_attr "type" "load")]) @@ -2368,34 +2368,52 @@ [(set_attr "length" "4") (set_attr "type" "load")]) -(define_expand "bswaphi2" - [(parallel [(set (match_operand:HI 0 "reg_or_mem_operand" "") - (bswap:HI - (match_operand:HI 1 "reg_or_mem_operand" ""))) - (clobber (match_scratch:SI 2 ""))])] +;; Separate the bswap patterns into load, store, and gpr<-gpr. This prevents +;; the register allocator from converting a gpr<-gpr swap into a store and then +;; load with byte swap, which can be slower than doing it in the registers. It +;; also prevents certain failures with the RELOAD register allocator. + +(define_expand "bswap2" + [(use (match_operand:HSI 0 "reg_or_mem_operand")) + (use (match_operand:HSI 1 "reg_or_mem_operand"))] "" { - if (!REG_P (operands[0]) && !REG_P (operands[1])) - operands[1] = force_reg (HImode, operands[1]); + rtx dest = operands[0]; + rtx src = operands[1]; + + if (!REG_P (dest) && !REG_P (src)) + src = force_reg (mode, src); + + if (MEM_P (src)) + emit_insn (gen_bswap2_load (dest, src)); + else if (MEM_P (dest)) + emit_insn (gen_bswap2_store (dest, src)); + else + emit_insn (gen_bswap2_reg (dest, src)); + DONE; }) -(define_insn "bswaphi2_internal" - [(set (match_operand:HI 0 "reg_or_mem_operand" "=r,Z,&r") - (bswap:HI - (match_operand:HI 1 "reg_or_mem_operand" "Z,r,r"))) - (clobber (match_scratch:SI 2 "=X,X,&r"))] +(define_insn "bswap2_load" + [(set (match_operand:HSI 0 "gpc_reg_operand" "=r") + (bswap:HSI (match_operand:HSI 1 "memory_operand" "Z")))] "" - "@ - lhbrx %0,%y1 - sthbrx %1,%y0 - #" - [(set_attr "length" "4,4,12") - (set_attr "type" "load,store,*")]) + "lbrx %0,%y1" + [(set_attr "type" "load")]) -(define_split - [(set (match_operand:HI 0 "gpc_reg_operand" "") - (bswap:HI (match_operand:HI 1 "gpc_reg_operand" ""))) - (clobber (match_operand:SI 2 "gpc_reg_operand" ""))] +(define_insn "bswap2_store" + [(set (match_operand:HSI 0 "memory_operand" "=Z") + (bswap:HSI (match_operand:HSI 1 "gpc_reg_operand" "r")))] + "" + "stbrx %1,%y0" + [(set_attr "type" "store")]) + +(define_insn_and_split "bswaphi2_reg" + [(set (match_operand:HI 0 "gpc_reg_operand" "=&r") + (bswap:HI + (match_operand:HI 1 "gpc_reg_operand" "r"))) + (clobber (match_scratch:SI 2 "=&r"))] + "" + "#" "reload_completed" [(set (match_dup 3) (and:SI (lshiftrt:SI (match_dup 4) @@ -2408,48 +2426,21 @@ (set (match_dup 3) (ior:SI (match_dup 3) (match_dup 2)))] - " { operands[3] = simplify_gen_subreg (SImode, operands[0], HImode, 0); operands[4] = simplify_gen_subreg (SImode, operands[1], HImode, 0); -}") - -(define_insn "*bswapsi2_extenddi" - [(set (match_operand:DI 0 "gpc_reg_operand" "=r") - (zero_extend:DI - (bswap:SI (match_operand:SI 1 "memory_operand" "Z"))))] - "TARGET_POWERPC64" - "lwbrx %0,%y1" - [(set_attr "length" "4") - (set_attr "type" "load")]) - -(define_expand "bswapsi2" - [(set (match_operand:SI 0 "reg_or_mem_operand" "") - (bswap:SI - (match_operand:SI 1 "reg_or_mem_operand" "")))] - "" -{ - if (!REG_P (operands[0]) && !REG_P (operands[1])) - operands[1] = force_reg (SImode, operands[1]); -}) - -(define_insn "*bswapsi2_internal" - [(set (match_operand:SI 0 "reg_or_mem_operand" "=r,Z,&r") - (bswap:SI - (match_operand:SI 1 "reg_or_mem_operand" "Z,r,r")))] - "" - "@ - lwbrx %0,%y1 - stwbrx %1,%y0 - #" - [(set_attr "length" "4,4,12") - (set_attr "type" "load,store,*")]) +} + [(set_attr "length" "12") + (set_attr "type" "*")]) ;; We are always BITS_BIG_ENDIAN, so the bit positions below in ;; zero_extract insns do not change for -mlittle. -(define_split - [(set (match_operand:SI 0 "gpc_reg_operand" "") - (bswap:SI (match_operand:SI 1 "gpc_reg_operand" "")))] +(define_insn_and_split "bswapsi2_reg" + [(set (match_operand:SI 0 "gpc_reg_operand" "=&r") + (bswap:SI + (match_operand:SI 1 "gpc_reg_operand" "r")))] + "" + "#" "reload_completed" [(set (match_dup 0) ; DABC (rotate:SI (match_dup 1) @@ -2465,11 +2456,13 @@ (const_int 24)) (const_int 255)) (and:SI (match_dup 0) - (const_int -256)))) - - ] + (const_int -256))))] "") +;; On systems with LDBRX/STDBRX generate the loads/stores directly, just like +;; we do for L{H,W}BRX and ST{H,W}BRX above. If not, we have to generate more +;; complex code. + (define_expand "bswapdi2" [(parallel [(set (match_operand:DI 0 "reg_or_mem_operand" "") (bswap:DI @@ -2478,33 +2471,56 @@ (clobber (match_scratch:DI 3 ""))])] "" { - if (!REG_P (operands[0]) && !REG_P (operands[1])) - operands[1] = force_reg (DImode, operands[1]); + rtx dest = operands[0]; + rtx src = operands[1]; + + if (!REG_P (dest) && !REG_P (src)) + operands[1] = src = force_reg (DImode, src); + + if (TARGET_POWERPC64 && TARGET_LDBRX) + { + if (MEM_P (src)) + emit_insn (gen_bswapdi2_load (dest, src)); + else if (MEM_P (dest)) + emit_insn (gen_bswapdi2_store (dest, src)); + else + emit_insn (gen_bswapdi2_reg (dest, src)); + DONE; + } if (!TARGET_POWERPC64) { /* 32-bit mode needs fewer scratch registers, but 32-bit addressing mode that uses 64-bit registers needs the same scratch registers as 64-bit mode. */ - emit_insn (gen_bswapdi2_32bit (operands[0], operands[1])); + emit_insn (gen_bswapdi2_32bit (dest, src)); DONE; } }) ;; Power7/cell has ldbrx/stdbrx, so use it directly -(define_insn "*bswapdi2_ldbrx" - [(set (match_operand:DI 0 "reg_or_mem_operand" "=r,Z,&r") - (bswap:DI (match_operand:DI 1 "reg_or_mem_operand" "Z,r,r"))) - (clobber (match_scratch:DI 2 "=X,X,&r")) - (clobber (match_scratch:DI 3 "=X,X,&r"))] - "TARGET_POWERPC64 && TARGET_LDBRX - && (REG_P (operands[0]) || REG_P (operands[1]))" - "@ - ldbrx %0,%y1 - stdbrx %1,%y0 - #" - [(set_attr "length" "4,4,36") - (set_attr "type" "load,store,*")]) +(define_insn "bswapdi2_load" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (bswap:DI (match_operand:DI 1 "memory_operand" "Z")))] + "TARGET_POWERPC64 && TARGET_LDBRX" + "ldbrx %0,%y1" + [(set_attr "type" "load")]) + +(define_insn "bswapdi2_store" + [(set (match_operand:DI 0 "memory_operand" "=Z") + (bswap:DI (match_operand:DI 1 "gpc_reg_operand" "r")))] + "TARGET_POWERPC64 && TARGET_LDBRX" + "stdbrx %1,%y0" + [(set_attr "type" "store")]) + +(define_insn "bswapdi2_reg" + [(set (match_operand:DI 0 "gpc_reg_operand" "=&r") + (bswap:DI (match_operand:DI 1 "gpc_reg_operand" "r"))) + (clobber (match_scratch:DI 2 "=&r")) + (clobber (match_scratch:DI 3 "=&r"))] + "TARGET_POWERPC64 && TARGET_LDBRX" + "#" + [(set_attr "length" "36")]) ;; Non-power7/cell, fall back to use lwbrx/stwbrx (define_insn "*bswapdi2_64bit" diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a6b7a7d..8cac7ec 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2017-03-27 Michael Meissner + + PR target/78543 + * gcc.target/powerpc/pr78543.c: New test. + 2017-03-27 Kelvin Nilsen PR target/80103 diff --git a/gcc/testsuite/gcc.target/powerpc/pr78543.c b/gcc/testsuite/gcc.target/powerpc/pr78543.c new file mode 100644 index 0000000..0421344 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr78543.c @@ -0,0 +1,60 @@ +/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-options "-mcpu=power8 -O1 -mno-lra" } */ + +typedef long a; +enum c { e, f, g, h, i, ab } j(); +int l, n, o, p; +a q, r; +void *memcpy(); +void b(); +static int k(int *s) { + int m; + if (j(&m)) + *s = m; + return !0; +} +void d(char s) { + int af[4]; + int ag; + enum c ah; + char ai[24 << 11]; + unsigned aj; + if (!k(&aj)) + goto ak; + for (;;) { + if (!k(&ag)) + goto ak; + switch (ah) { + case e: + b(""); + b("bad length %d for GUID in fileinfo v%u for \"%s\""); + case i: + b("bad length %d for TTH in fileinfo v%u for \"%s\"", aj); + case ab: + if (ag % 24) + b("for \"%s\"", s); + case f: + if (20 == ag) + case h: + if (20 == ag) + o = 0; + break; + case g: + memcpy(af, ai, sizeof af); + b(); + if (p) { + a al, am; + r = al << 2 | am; + n = af[2]; + al = n; + l = __builtin_bswap32(af[3]); + am = q = n | l; + } + default: + b("%s0 unhandled field ID %u 0", __func__); + } + } +ak:; +} -- 2.7.4