From 03a7bf5ab51e5871927595ed1f9d63e34ed19dc5 Mon Sep 17 00:00:00 2001 From: ktkachov Date: Tue, 10 Jun 2014 14:26:20 +0000 Subject: [PATCH] [AArch64] Add a big-endian lane flip at expand-time in saturating math patterns. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane): New expander. (aarch64_sqrdmulh_lane): Likewise. (aarch64_sqdmulh_lane): Rename to... (aarch64_sqdmulh_lane_internal): ...this. (aarch64_sqdmulh_laneq): New expander. (aarch64_sqrdmulh_laneq): Likewise. (aarch64_sqdmulh_laneq): Rename to... (aarch64_sqdmulh_laneq_internal): ...this. (aarch64_sqdmulh_lane): New expander. (aarch64_sqrdmulh_lane): Likewise. (aarch64_sqdmulh_lane): Rename to... (aarch64_sqdmulh_lane_internal): ...this. (aarch64_sqdmlal_lane): Add lane flip for big-endian. (aarch64_sqdmlal_laneq): Likewise. (aarch64_sqdmlsl_lane): Likewise. (aarch64_sqdmlsl_laneq): Likewise. (aarch64_sqdmlal2_lane): Likewise. (aarch64_sqdmlal2_laneq): Likewise. (aarch64_sqdmlsl2_lane): Likewise. (aarch64_sqdmlsl2_laneq): Likewise. (aarch64_sqdmull_lane): Likewise. (aarch64_sqdmull_laneq): Likewise. (aarch64_sqdmull2_lane): Likewise. (aarch64_sqdmull2_laneq): Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@211414 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 28 +++++++++ gcc/config/aarch64/aarch64-simd.md | 122 +++++++++++++++++++++++++++++++++++-- 2 files changed, 145 insertions(+), 5 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 180d1c8..7b17baa 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,31 @@ +2014-06-10 Kyrylo Tkachov + + * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane): + New expander. + (aarch64_sqrdmulh_lane): Likewise. + (aarch64_sqdmulh_lane): Rename to... + (aarch64_sqdmulh_lane_internal): ...this. + (aarch64_sqdmulh_laneq): New expander. + (aarch64_sqrdmulh_laneq): Likewise. + (aarch64_sqdmulh_laneq): Rename to... + (aarch64_sqdmulh_laneq_internal): ...this. + (aarch64_sqdmulh_lane): New expander. + (aarch64_sqrdmulh_lane): Likewise. + (aarch64_sqdmulh_lane): Rename to... + (aarch64_sqdmulh_lane_internal): ...this. + (aarch64_sqdmlal_lane): Add lane flip for big-endian. + (aarch64_sqdmlal_laneq): Likewise. + (aarch64_sqdmlsl_lane): Likewise. + (aarch64_sqdmlsl_laneq): Likewise. + (aarch64_sqdmlal2_lane): Likewise. + (aarch64_sqdmlal2_laneq): Likewise. + (aarch64_sqdmlsl2_lane): Likewise. + (aarch64_sqdmlsl2_laneq): Likewise. + (aarch64_sqdmull_lane): Likewise. + (aarch64_sqdmull_laneq): Likewise. + (aarch64_sqdmull2_lane): Likewise. + (aarch64_sqdmull2_laneq): Likewise. + 2014-06-10 Richard Biener PR tree-optimization/61438 diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 8b3cfa1..8c840f5 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -2650,7 +2650,41 @@ ;; sqdmulh_lane -(define_insn "aarch64_sqdmulh_lane" +(define_expand "aarch64_sqdmulh_lane" + [(match_operand:VDQHS 0 "register_operand" "") + (match_operand:VDQHS 1 "register_operand" "") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmulh_lane_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_expand "aarch64_sqrdmulh_lane" + [(match_operand:VDQHS 0 "register_operand" "") + (match_operand:VDQHS 1 "register_operand" "") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqrdmulh_lane_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_insn "aarch64_sqdmulh_lane_internal" [(set (match_operand:VDQHS 0 "register_operand" "=w") (unspec:VDQHS [(match_operand:VDQHS 1 "register_operand" "w") @@ -2666,7 +2700,41 @@ [(set_attr "type" "neon_sat_mul__scalar")] ) -(define_insn "aarch64_sqdmulh_laneq" +(define_expand "aarch64_sqdmulh_laneq" + [(match_operand:VDQHS 0 "register_operand" "") + (match_operand:VDQHS 1 "register_operand" "") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmulh_laneq_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_expand "aarch64_sqrdmulh_laneq" + [(match_operand:VDQHS 0 "register_operand" "") + (match_operand:VDQHS 1 "register_operand" "") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqrdmulh_laneq_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_insn "aarch64_sqdmulh_laneq_internal" [(set (match_operand:VDQHS 0 "register_operand" "=w") (unspec:VDQHS [(match_operand:VDQHS 1 "register_operand" "w") @@ -2676,13 +2744,46 @@ VQDMULH))] "TARGET_SIMD" "* - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); return \"sqdmulh\\t%0., %1., %2.[%3]\";" [(set_attr "type" "neon_sat_mul__scalar")] ) -(define_insn "aarch64_sqdmulh_lane" +(define_expand "aarch64_sqdmulh_lane" + [(match_operand:SD_HSI 0 "register_operand" "") + (match_operand:SD_HSI 1 "register_operand" "") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqdmulh_lane_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_expand "aarch64_sqrdmulh_lane" + [(match_operand:SD_HSI 0 "register_operand" "") + (match_operand:SD_HSI 1 "register_operand" "") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_SIMD" + { + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + emit_insn (gen_aarch64_sqrdmulh_lane_internal (operands[0], + operands[1], + operands[2], + operands[3])); + DONE; + } +) + +(define_insn "aarch64_sqdmulh_lane_internal" [(set (match_operand:SD_HSI 0 "register_operand" "=w") (unspec:SD_HSI [(match_operand:SD_HSI 1 "register_operand" "w") @@ -2692,7 +2793,6 @@ VQDMULH))] "TARGET_SIMD" "* - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); return \"sqdmulh\\t%0, %1, %2.[%3]\";" [(set_attr "type" "neon_sat_mul__scalar")] @@ -2774,6 +2874,7 @@ "TARGET_SIMD" { aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode) / 2); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlal_lane_internal (operands[0], operands[1], operands[2], operands[3], operands[4])); @@ -2789,6 +2890,7 @@ "TARGET_SIMD" { aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlal_lane_internal (operands[0], operands[1], operands[2], operands[3], operands[4])); @@ -2804,6 +2906,7 @@ "TARGET_SIMD" { aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode) / 2); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlsl_lane_internal (operands[0], operands[1], operands[2], operands[3], operands[4])); @@ -2819,6 +2922,7 @@ "TARGET_SIMD" { aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlsl_lane_internal (operands[0], operands[1], operands[2], operands[3], operands[4])); @@ -2930,6 +3034,7 @@ { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode) / 2); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlal2_lane_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); @@ -2946,6 +3051,7 @@ { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlal2_lane_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); @@ -2962,6 +3068,7 @@ { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode) / 2); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlsl2_lane_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); @@ -2978,6 +3085,7 @@ { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); emit_insn (gen_aarch64_sqdmlsl2_lane_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); @@ -3098,6 +3206,7 @@ "TARGET_SIMD" { aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode) / 2); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); emit_insn (gen_aarch64_sqdmull_lane_internal (operands[0], operands[1], operands[2], operands[3])); DONE; @@ -3111,6 +3220,7 @@ "TARGET_SIMD" { aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); emit_insn (gen_aarch64_sqdmull_lane_internal (operands[0], operands[1], operands[2], operands[3])); DONE; @@ -3203,6 +3313,7 @@ { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode) / 2); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); emit_insn (gen_aarch64_sqdmull2_lane_internal (operands[0], operands[1], operands[2], operands[3], p)); @@ -3218,6 +3329,7 @@ { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); emit_insn (gen_aarch64_sqdmull2_lane_internal (operands[0], operands[1], operands[2], operands[3], p)); -- 2.7.4