From f303865e326d0ecc5c370cd7cc6336f8db44b7ee Mon Sep 17 00:00:00 2001 From: yroux Date: Thu, 4 Dec 2014 19:43:18 +0000 Subject: [PATCH] 2014-12-04 Yvan Roux Backport from trunk r217079, r217080. 2014-11-04 Alan Lawrence config/arm/neon.md (reduc_smin_ *2): Rename to... (reduc_smin_scal_ *2): ...this; extract scalar result. (reduc_smax_ *2): Rename to... (reduc_smax_scal_ *2): ...this; extract scalar result. (reduc_umin_ *2): Rename to... (reduc_umin_scal_ *2): ...this; extract scalar result. (reduc_umax_ *2): Rename to... (reduc_umax_scal_ *2): ...this; extract scalar result. 2014-11-04 Alan Lawrence config/arm/neon.md (reduc_plus_*): Rename to... (reduc_plus_scal_*): ...this; reduce to temp and extract scalar result. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_9-branch@218398 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog.linaro | 19 +++++++++ gcc/config/arm/neon.md | 112 ++++++++++++++++++++++++++----------------------- 2 files changed, 79 insertions(+), 52 deletions(-) diff --git a/gcc/ChangeLog.linaro b/gcc/ChangeLog.linaro index 9129086..2c32c3f 100644 --- a/gcc/ChangeLog.linaro +++ b/gcc/ChangeLog.linaro @@ -1,5 +1,24 @@ 2014-12-04 Yvan Roux + Backport from trunk r217079, r217080. + 2014-11-04 Alan Lawrence + + config/arm/neon.md (reduc_smin_ *2): Rename to... + (reduc_smin_scal_ *2): ...this; extract scalar result. + (reduc_smax_ *2): Rename to... + (reduc_smax_scal_ *2): ...this; extract scalar result. + (reduc_umin_ *2): Rename to... + (reduc_umin_scal_ *2): ...this; extract scalar result. + (reduc_umax_ *2): Rename to... + (reduc_umax_scal_ *2): ...this; extract scalar result. + + 2014-11-04 Alan Lawrence + + config/arm/neon.md (reduc_plus_*): Rename to... + (reduc_plus_scal_*): ...this; reduce to temp and extract scalar result. + +2014-12-04 Yvan Roux + Fix Backport from trunk r216524 (committed at r218379). Add missing file: config/aarch64/aarch64-cost-tables.h diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 4e8fa09..9540f3b 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -1349,33 +1349,47 @@ ;; Reduction operations -(define_expand "reduc_splus_" - [(match_operand:VD 0 "s_register_operand" "") +(define_expand "reduc_plus_scal_" + [(match_operand: 0 "nonimmediate_operand" "") (match_operand:VD 1 "s_register_operand" "")] "TARGET_NEON && (! || flag_unsafe_math_optimizations)" { - neon_pairwise_reduce (operands[0], operands[1], mode, + rtx vec = gen_reg_rtx (mode); + neon_pairwise_reduce (vec, operands[1], mode, &gen_neon_vpadd_internal); + /* The same result is actually computed into every element. */ + emit_insn (gen_vec_extract (operands[0], vec, const0_rtx)); DONE; }) -(define_expand "reduc_splus_" - [(match_operand:VQ 0 "s_register_operand" "") +(define_expand "reduc_plus_scal_" + [(match_operand: 0 "nonimmediate_operand" "") (match_operand:VQ 1 "s_register_operand" "")] "TARGET_NEON && (! || flag_unsafe_math_optimizations) && !BYTES_BIG_ENDIAN" { rtx step1 = gen_reg_rtx (mode); - rtx res_d = gen_reg_rtx (mode); emit_insn (gen_quad_halves_plus (step1, operands[1])); - emit_insn (gen_reduc_splus_ (res_d, step1)); - emit_insn (gen_move_lo_quad_ (operands[0], res_d)); + emit_insn (gen_reduc_plus_scal_ (operands[0], step1)); DONE; }) -(define_insn "reduc_splus_v2di" +(define_expand "reduc_plus_scal_v2di" + [(match_operand:DI 0 "nonimmediate_operand" "=w") + (match_operand:V2DI 1 "s_register_operand" "")] + "TARGET_NEON && !BYTES_BIG_ENDIAN" +{ + rtx vec = gen_reg_rtx (V2DImode); + + emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1])); + emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx)); + + DONE; +}) + +(define_insn "arm_reduc_plus_internal_v2di" [(set (match_operand:V2DI 0 "s_register_operand" "=w") (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")] UNSPEC_VPADD))] @@ -1384,115 +1398,109 @@ [(set_attr "type" "neon_add_q")] ) -;; NEON does not distinguish between signed and unsigned addition except on -;; widening operations. -(define_expand "reduc_uplus_" - [(match_operand:VDQI 0 "s_register_operand" "") - (match_operand:VDQI 1 "s_register_operand" "")] - "TARGET_NEON && ( || !BYTES_BIG_ENDIAN)" -{ - emit_insn (gen_reduc_splus_ (operands[0], operands[1])); - DONE; -}) - -(define_expand "reduc_smin_" - [(match_operand:VD 0 "s_register_operand" "") +(define_expand "reduc_smin_scal_" + [(match_operand: 0 "nonimmediate_operand" "") (match_operand:VD 1 "s_register_operand" "")] "TARGET_NEON && (! || flag_unsafe_math_optimizations)" { - neon_pairwise_reduce (operands[0], operands[1], mode, + rtx vec = gen_reg_rtx (mode); + + neon_pairwise_reduce (vec, operands[1], mode, &gen_neon_vpsmin); + /* The result is computed into every element of the vector. */ + emit_insn (gen_vec_extract (operands[0], vec, const0_rtx)); DONE; }) -(define_expand "reduc_smin_" - [(match_operand:VQ 0 "s_register_operand" "") +(define_expand "reduc_smin_scal_" + [(match_operand: 0 "nonimmediate_operand" "") (match_operand:VQ 1 "s_register_operand" "")] "TARGET_NEON && (! || flag_unsafe_math_optimizations) && !BYTES_BIG_ENDIAN" { rtx step1 = gen_reg_rtx (mode); - rtx res_d = gen_reg_rtx (mode); emit_insn (gen_quad_halves_smin (step1, operands[1])); - emit_insn (gen_reduc_smin_ (res_d, step1)); - emit_insn (gen_move_lo_quad_ (operands[0], res_d)); + emit_insn (gen_reduc_smin_scal_ (operands[0], step1)); DONE; }) -(define_expand "reduc_smax_" - [(match_operand:VD 0 "s_register_operand" "") +(define_expand "reduc_smax_scal_" + [(match_operand: 0 "nonimmediate_operand" "") (match_operand:VD 1 "s_register_operand" "")] "TARGET_NEON && (! || flag_unsafe_math_optimizations)" { - neon_pairwise_reduce (operands[0], operands[1], mode, + rtx vec = gen_reg_rtx (mode); + neon_pairwise_reduce (vec, operands[1], mode, &gen_neon_vpsmax); + /* The result is computed into every element of the vector. */ + emit_insn (gen_vec_extract (operands[0], vec, const0_rtx)); DONE; }) -(define_expand "reduc_smax_" - [(match_operand:VQ 0 "s_register_operand" "") +(define_expand "reduc_smax_scal_" + [(match_operand: 0 "nonimmediate_operand" "") (match_operand:VQ 1 "s_register_operand" "")] "TARGET_NEON && (! || flag_unsafe_math_optimizations) && !BYTES_BIG_ENDIAN" { rtx step1 = gen_reg_rtx (mode); - rtx res_d = gen_reg_rtx (mode); emit_insn (gen_quad_halves_smax (step1, operands[1])); - emit_insn (gen_reduc_smax_ (res_d, step1)); - emit_insn (gen_move_lo_quad_ (operands[0], res_d)); + emit_insn (gen_reduc_smax_scal_ (operands[0], step1)); DONE; }) -(define_expand "reduc_umin_" - [(match_operand:VDI 0 "s_register_operand" "") +(define_expand "reduc_umin_scal_" + [(match_operand: 0 "nonimmediate_operand" "") (match_operand:VDI 1 "s_register_operand" "")] "TARGET_NEON" { - neon_pairwise_reduce (operands[0], operands[1], mode, + rtx vec = gen_reg_rtx (mode); + neon_pairwise_reduce (vec, operands[1], mode, &gen_neon_vpumin); + /* The result is computed into every element of the vector. */ + emit_insn (gen_vec_extract (operands[0], vec, const0_rtx)); DONE; }) -(define_expand "reduc_umin_" - [(match_operand:VQI 0 "s_register_operand" "") +(define_expand "reduc_umin_scal_" + [(match_operand: 0 "nonimmediate_operand" "") (match_operand:VQI 1 "s_register_operand" "")] "TARGET_NEON && !BYTES_BIG_ENDIAN" { rtx step1 = gen_reg_rtx (mode); - rtx res_d = gen_reg_rtx (mode); emit_insn (gen_quad_halves_umin (step1, operands[1])); - emit_insn (gen_reduc_umin_ (res_d, step1)); - emit_insn (gen_move_lo_quad_ (operands[0], res_d)); + emit_insn (gen_reduc_umin_scal_ (operands[0], step1)); DONE; }) -(define_expand "reduc_umax_" - [(match_operand:VDI 0 "s_register_operand" "") +(define_expand "reduc_umax_scal_" + [(match_operand: 0 "nonimmediate_operand" "") (match_operand:VDI 1 "s_register_operand" "")] "TARGET_NEON" { - neon_pairwise_reduce (operands[0], operands[1], mode, + rtx vec = gen_reg_rtx (mode); + neon_pairwise_reduce (vec, operands[1], mode, &gen_neon_vpumax); + /* The result is computed into every element of the vector. */ + emit_insn (gen_vec_extract (operands[0], vec, const0_rtx)); DONE; }) -(define_expand "reduc_umax_" - [(match_operand:VQI 0 "s_register_operand" "") +(define_expand "reduc_umax_scal_" + [(match_operand: 0 "nonimmediate_operand" "") (match_operand:VQI 1 "s_register_operand" "")] "TARGET_NEON && !BYTES_BIG_ENDIAN" { rtx step1 = gen_reg_rtx (mode); - rtx res_d = gen_reg_rtx (mode); emit_insn (gen_quad_halves_umax (step1, operands[1])); - emit_insn (gen_reduc_umax_ (res_d, step1)); - emit_insn (gen_move_lo_quad_ (operands[0], res_d)); + emit_insn (gen_reduc_umax_scal_ (operands[0], step1)); DONE; }) -- 2.7.4