From f88fbcb9038a17c788b10653842a99e1e3227d24 Mon Sep 17 00:00:00 2001 From: meissner Date: Mon, 10 Jun 2013 21:42:14 +0000 Subject: [PATCH] [gcc] 2013-06-10 Michael Meissner Pat Haugen Peter Bergner * config/rs6000/vector.md (GPR move splitter): Do not split moves of vectors in GPRS if they are direct moves or quad word load or store moves. * config/rs6000/rs6000-protos.h (rs6000_output_move_128bit): Add declaration. (direct_move_p): Likewise. (quad_load_store_p): Likewise. * config/rs6000/rs6000.c (enum rs6000_reg_type): Simplify register classes into bins based on the physical register type. (reg_class_to_reg_type): Likewise. (IS_STD_REG_TYPE): Likewise. (IS_FP_VECT_REG_TYPE): Likewise. (reload_fpr_gpr): Arrays to determine what insn to use if we can use direct move instructions. (reload_gpr_vsx): Likewise. (reload_vsx_gpr): Likewise. (rs6000_init_hard_regno_mode_ok): Precalculate the register type information that is a simplification of register classes. Also precalculate direct move reload helpers. (direct_move_p): New function to return true if the operation can be done as a direct move instruciton. (quad_load_store_p): New function to return true if the operation is a quad memory operation. (rs6000_legitimize_address): If quad memory, only allow register indirect for TImode addresses. (rs6000_legitimate_address_p): Likewise. (enum reload_reg_type): Delete, replace with rs6000_reg_type. (rs6000_reload_register_type): Likewise. (register_to_reg_type): Return register type. (rs6000_secondary_reload_simple_move): New helper function for secondary reload and secondary memory needed to identify anything that is a simple move, and does not need reloading. (rs6000_secondary_reload_direct_move): New helper function for secondary reload to identify cases that can be done with several instructions via the direct move instructions. (rs6000_secondary_reload_move): New helper function for secondary reload to identify moves between register types that can be done. (rs6000_secondary_reload): Add support for quad memory operations and for direct move. (rs6000_secondary_memory_needed): Likewise. (rs6000_debug_secondary_memory_needed): Change argument names. (rs6000_output_move_128bit): New function to return the move to use for 128-bit moves, including knowing about the various limitations of quad memory operations. * config/rs6000/vsx.md (vsx_mov): Add support for quad memory operations. call rs6000_output_move_128bit for the actual instruciton(s) to generate. (vsx_movti_64bit): Likewise. * config/rs6000/rs6000.md (UNSPEC_P8V_FMRGOW): New unspec values. (UNSPEC_P8V_MTVSRWZ): Likewise. (UNSPEC_P8V_RELOAD_FROM_GPR): Likewise. (UNSPEC_P8V_MTVSRD): Likewise. (UNSPEC_P8V_XXPERMDI): Likewise. (UNSPEC_P8V_RELOAD_FROM_VSX): Likewise. (UNSPEC_FUSION_GPR): Likewise. (FMOVE128_GPR): New iterator for direct move. (f32_lv): New mode attribute for load/store of SFmode/SDmode values. (f32_sv): Likewise. (f32_dm): Likewise. (zero_extenddi2_internal1): Add support for power8 32-bit loads and direct move instructions. (zero_extendsidi2_lfiwzx): Likewise. (extendsidi2_lfiwax): Likewise. (extendsidi2_nocell): Likewise. (floatsi2_lfiwax): Likewise. (lfiwax): Likewise. (floatunssi2_lfiwzx): Likewise. (lfiwzx): Likewise. (fix_trunc_stfiwx): Likewise. (fixuns_trunc_stfiwx): Likewise. (mov_hardfloat, 32-bit floating point): Likewise. (mov_hardfloat64, 64-bit floating point): Likewise. (parity2_cmpb): Set length/type attr. (unnamed shift right patterns, mov_internal2): Change type attr for 'mr.' to fast_compare. (bpermd_): Change type attr to popcnt. (p8_fmrgow_): New insns for power8 direct move support. (p8_mtvsrwz_1): Likewise. (p8_mtvsrwz_2): Likewise. (reload_fpr_from_gpr): Likewise. (p8_mtvsrd_1): Likewise. (p8_mtvsrd_2): Likewise. (p8_xxpermdi_): Likewise. (reload_vsx_from_gpr): Likewise. (reload_vsx_from_gprsf): Likewise. (p8_mfvsrd_3_): LIkewise. (reload_gpr_from_vsx): Likewise. (reload_gpr_from_vsxsf): Likewise. (p8_mfvsrd_4_disf): Likewise. (multi-word GPR splits): Do not split direct moves or quad memory operations. [gcc/testsuite] 2013-06-10 Michael Meissner Pat Haugen Peter Bergner * gcc.target/powerpc/direct-move-vint1.c: New tests for power8 direct move instructions. * gcc.target/powerpc/direct-move-vint2.c: Likewise. * gcc.target/powerpc/direct-move.h: Likewise. * gcc.target/powerpc/direct-move-float1.c: Likewise. * gcc.target/powerpc/direct-move-float2.c: Likewise. * gcc.target/powerpc/direct-move-double1.c: Likewise. * gcc.target/powerpc/direct-move-double2.c: Likewise. * gcc.target/powerpc/direct-move-long1.c: Likewise. * gcc.target/powerpc/direct-move-long2.c: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@199918 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 101 +++ gcc/config/rs6000/rs6000-protos.h | 3 + gcc/config/rs6000/rs6000.c | 718 ++++++++++++++++++--- gcc/config/rs6000/rs6000.md | 392 +++++++++-- gcc/config/rs6000/vector.md | 4 +- gcc/config/rs6000/vsx.md | 101 +-- gcc/testsuite/ChangeLog | 15 + .../gcc.target/powerpc/direct-move-double1.c | 15 + .../gcc.target/powerpc/direct-move-double2.c | 14 + .../gcc.target/powerpc/direct-move-float1.c | 17 + .../gcc.target/powerpc/direct-move-float2.c | 14 + .../gcc.target/powerpc/direct-move-long1.c | 15 + .../gcc.target/powerpc/direct-move-long2.c | 14 + .../gcc.target/powerpc/direct-move-vint1.c | 13 + .../gcc.target/powerpc/direct-move-vint2.c | 12 + gcc/testsuite/gcc.target/powerpc/direct-move.h | 183 ++++++ 16 files changed, 1424 insertions(+), 207 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-double1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-double2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-float1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-float2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-long1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-long2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-vint1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-vint2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move.h diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4c02221..77bd5e6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,104 @@ +2013-06-10 Michael Meissner + Pat Haugen + Peter Bergner + + * config/rs6000/vector.md (GPR move splitter): Do not split moves + of vectors in GPRS if they are direct moves or quad word load or + store moves. + + * config/rs6000/rs6000-protos.h (rs6000_output_move_128bit): Add + declaration. + (direct_move_p): Likewise. + (quad_load_store_p): Likewise. + + * config/rs6000/rs6000.c (enum rs6000_reg_type): Simplify register + classes into bins based on the physical register type. + (reg_class_to_reg_type): Likewise. + (IS_STD_REG_TYPE): Likewise. + (IS_FP_VECT_REG_TYPE): Likewise. + (reload_fpr_gpr): Arrays to determine what insn to use if we can + use direct move instructions. + (reload_gpr_vsx): Likewise. + (reload_vsx_gpr): Likewise. + (rs6000_init_hard_regno_mode_ok): Precalculate the register type + information that is a simplification of register classes. Also + precalculate direct move reload helpers. + (direct_move_p): New function to return true if the operation can + be done as a direct move instruciton. + (quad_load_store_p): New function to return true if the operation + is a quad memory operation. + (rs6000_legitimize_address): If quad memory, only allow register + indirect for TImode addresses. + (rs6000_legitimate_address_p): Likewise. + (enum reload_reg_type): Delete, replace with rs6000_reg_type. + (rs6000_reload_register_type): Likewise. + (register_to_reg_type): Return register type. + (rs6000_secondary_reload_simple_move): New helper function for + secondary reload and secondary memory needed to identify anything + that is a simple move, and does not need reloading. + (rs6000_secondary_reload_direct_move): New helper function for + secondary reload to identify cases that can be done with several + instructions via the direct move instructions. + (rs6000_secondary_reload_move): New helper function for secondary + reload to identify moves between register types that can be done. + (rs6000_secondary_reload): Add support for quad memory operations + and for direct move. + (rs6000_secondary_memory_needed): Likewise. + (rs6000_debug_secondary_memory_needed): Change argument names. + (rs6000_output_move_128bit): New function to return the move to + use for 128-bit moves, including knowing about the various + limitations of quad memory operations. + + * config/rs6000/vsx.md (vsx_mov): Add support for quad + memory operations. call rs6000_output_move_128bit for the actual + instruciton(s) to generate. + (vsx_movti_64bit): Likewise. + + * config/rs6000/rs6000.md (UNSPEC_P8V_FMRGOW): New unspec values. + (UNSPEC_P8V_MTVSRWZ): Likewise. + (UNSPEC_P8V_RELOAD_FROM_GPR): Likewise. + (UNSPEC_P8V_MTVSRD): Likewise. + (UNSPEC_P8V_XXPERMDI): Likewise. + (UNSPEC_P8V_RELOAD_FROM_VSX): Likewise. + (UNSPEC_FUSION_GPR): Likewise. + (FMOVE128_GPR): New iterator for direct move. + (f32_lv): New mode attribute for load/store of SFmode/SDmode + values. + (f32_sv): Likewise. + (f32_dm): Likewise. + (zero_extenddi2_internal1): Add support for power8 32-bit + loads and direct move instructions. + (zero_extendsidi2_lfiwzx): Likewise. + (extendsidi2_lfiwax): Likewise. + (extendsidi2_nocell): Likewise. + (floatsi2_lfiwax): Likewise. + (lfiwax): Likewise. + (floatunssi2_lfiwzx): Likewise. + (lfiwzx): Likewise. + (fix_trunc_stfiwx): Likewise. + (fixuns_trunc_stfiwx): Likewise. + (mov_hardfloat, 32-bit floating point): Likewise. + (mov_hardfloat64, 64-bit floating point): Likewise. + (parity2_cmpb): Set length/type attr. + (unnamed shift right patterns, mov_internal2): Change type attr + for 'mr.' to fast_compare. + (bpermd_): Change type attr to popcnt. + (p8_fmrgow_): New insns for power8 direct move support. + (p8_mtvsrwz_1): Likewise. + (p8_mtvsrwz_2): Likewise. + (reload_fpr_from_gpr): Likewise. + (p8_mtvsrd_1): Likewise. + (p8_mtvsrd_2): Likewise. + (p8_xxpermdi_): Likewise. + (reload_vsx_from_gpr): Likewise. + (reload_vsx_from_gprsf): Likewise. + (p8_mfvsrd_3_): LIkewise. + (reload_gpr_from_vsx): Likewise. + (reload_gpr_from_vsxsf): Likewise. + (p8_mfvsrd_4_disf): Likewise. + (multi-word GPR splits): Do not split direct moves or quad memory + operations. + 2013-06-10 David Malcolm * tree-into-ssa.c (interesting_blocks): Make static. diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index d9bcf1a..02836ec 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -50,6 +50,7 @@ extern rtx rs6000_got_register (rtx); extern rtx find_addr_reg (rtx); extern rtx gen_easy_altivec_constant (rtx); extern const char *output_vec_const_move (rtx *); +extern const char *rs6000_output_move_128bit (rtx *); extern void rs6000_expand_vector_init (rtx, rtx); extern void paired_expand_vector_init (rtx, rtx); extern void rs6000_expand_vector_set (rtx, rtx, int); @@ -70,6 +71,8 @@ extern int insvdi_rshift_rlwimi_p (rtx, rtx, rtx); extern int registers_ok_for_quad_peep (rtx, rtx); extern int mems_ok_for_quad_peep (rtx, rtx); extern bool gpr_or_gpr_p (rtx, rtx); +extern bool direct_move_p (rtx, rtx); +extern bool quad_load_store_p (rtx, rtx); extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class); extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class, diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index df6c69e..3646c6d 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -292,6 +292,39 @@ typedef rtx (*gen_2arg_fn_t) (rtx, rtx, rtx); don't link in rs6000-c.c, so we can't call it directly. */ void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT); +/* Simplfy register classes into simpler classifications. We assume + GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range + check for standard register classes (gpr/floating/altivec/vsx) and + floating/vector classes (float/altivec/vsx). */ + +enum rs6000_reg_type { + NO_REG_TYPE, + PSEUDO_REG_TYPE, + GPR_REG_TYPE, + VSX_REG_TYPE, + ALTIVEC_REG_TYPE, + FPR_REG_TYPE, + SPR_REG_TYPE, + CR_REG_TYPE, + SPE_ACC_TYPE, + SPEFSCR_REG_TYPE +}; + +/* Map register class to register type. */ +static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES]; + +/* First/last register type for the 'normal' register types (i.e. general + purpose, floating point, altivec, and VSX registers). */ +#define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE) + +#define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE) + +/* Direct moves to/from vsx/gpr registers that need an additional register to + do the move. */ +static enum insn_code reload_fpr_gpr[NUM_MACHINE_MODES]; +static enum insn_code reload_gpr_vsx[NUM_MACHINE_MODES]; +static enum insn_code reload_vsx_gpr[NUM_MACHINE_MODES]; + /* Target cpu costs. */ @@ -1042,6 +1075,13 @@ static void rs6000_print_isa_options (FILE *, int, const char *, static void rs6000_print_builtin_options (FILE *, int, const char *, HOST_WIDE_INT); +static enum rs6000_reg_type register_to_reg_type (rtx, bool *); +static bool rs6000_secondary_reload_move (enum rs6000_reg_type, + enum rs6000_reg_type, + enum machine_mode, + secondary_reload_info *, + bool); + /* Hash table stuff for keeping track of TOC entries. */ struct GTY(()) toc_hash_struct @@ -1587,8 +1627,7 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode) return ALTIVEC_REGNO_P (last_regno); } - /* Allow TImode in all VSX registers if the user asked for it. Note, PTImode - can only go in GPRs. */ + /* Allow TImode in all VSX registers if the user asked for it. */ if (mode == TImode && TARGET_VSX_TIMODE && VSX_REGNO_P (regno)) return 1; @@ -2154,6 +2193,36 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS; rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS; + /* Precalculate register class to simpler reload register class. We don't + need all of the register classes that are combinations of different + classes, just the simple ones that have constraint letters. */ + for (c = 0; c < N_REG_CLASSES; c++) + reg_class_to_reg_type[c] = NO_REG_TYPE; + + reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE; + reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE; + reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE; + reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE; + reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE; + reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE; + reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE; + reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE; + + if (TARGET_VSX) + { + reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE; + reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE; + } + else + { + reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE; + reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE; + } + /* Precalculate vector information, this must be set up before the rs6000_hard_regno_nregs_internal below. */ for (m = 0; m < NUM_MACHINE_MODES; ++m) @@ -2305,7 +2374,15 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) if (TARGET_LFIWZX) rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; - /* Set up the reload helper functions. */ + /* Setup the direct move combinations. */ + for (m = 0; m < NUM_MACHINE_MODES; ++m) + { + reload_fpr_gpr[m] = CODE_FOR_nothing; + reload_gpr_vsx[m] = CODE_FOR_nothing; + reload_vsx_gpr[m] = CODE_FOR_nothing; + } + + /* Set up the reload helper and direct move functions. */ if (TARGET_VSX || TARGET_ALTIVEC) { if (TARGET_64BIT) @@ -2329,11 +2406,47 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_vector_reload[DDmode][0] = CODE_FOR_reload_dd_di_store; rs6000_vector_reload[DDmode][1] = CODE_FOR_reload_dd_di_load; } + if (TARGET_P8_VECTOR) + { + rs6000_vector_reload[SFmode][0] = CODE_FOR_reload_sf_di_store; + rs6000_vector_reload[SFmode][1] = CODE_FOR_reload_sf_di_load; + rs6000_vector_reload[SDmode][0] = CODE_FOR_reload_sd_di_store; + rs6000_vector_reload[SDmode][1] = CODE_FOR_reload_sd_di_load; + } if (TARGET_VSX_TIMODE) { rs6000_vector_reload[TImode][0] = CODE_FOR_reload_ti_di_store; rs6000_vector_reload[TImode][1] = CODE_FOR_reload_ti_di_load; } + if (TARGET_DIRECT_MOVE) + { + if (TARGET_POWERPC64) + { + reload_gpr_vsx[TImode] = CODE_FOR_reload_gpr_from_vsxti; + reload_gpr_vsx[V2DFmode] = CODE_FOR_reload_gpr_from_vsxv2df; + reload_gpr_vsx[V2DImode] = CODE_FOR_reload_gpr_from_vsxv2di; + reload_gpr_vsx[V4SFmode] = CODE_FOR_reload_gpr_from_vsxv4sf; + reload_gpr_vsx[V4SImode] = CODE_FOR_reload_gpr_from_vsxv4si; + reload_gpr_vsx[V8HImode] = CODE_FOR_reload_gpr_from_vsxv8hi; + reload_gpr_vsx[V16QImode] = CODE_FOR_reload_gpr_from_vsxv16qi; + reload_gpr_vsx[SFmode] = CODE_FOR_reload_gpr_from_vsxsf; + + reload_vsx_gpr[TImode] = CODE_FOR_reload_vsx_from_gprti; + reload_vsx_gpr[V2DFmode] = CODE_FOR_reload_vsx_from_gprv2df; + reload_vsx_gpr[V2DImode] = CODE_FOR_reload_vsx_from_gprv2di; + reload_vsx_gpr[V4SFmode] = CODE_FOR_reload_vsx_from_gprv4sf; + reload_vsx_gpr[V4SImode] = CODE_FOR_reload_vsx_from_gprv4si; + reload_vsx_gpr[V8HImode] = CODE_FOR_reload_vsx_from_gprv8hi; + reload_vsx_gpr[V16QImode] = CODE_FOR_reload_vsx_from_gprv16qi; + reload_vsx_gpr[SFmode] = CODE_FOR_reload_vsx_from_gprsf; + } + else + { + reload_fpr_gpr[DImode] = CODE_FOR_reload_fpr_from_gprdi; + reload_fpr_gpr[DDmode] = CODE_FOR_reload_fpr_from_gprdd; + reload_fpr_gpr[DFmode] = CODE_FOR_reload_fpr_from_gprdf; + } + } } else { @@ -2356,6 +2469,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_vector_reload[DDmode][0] = CODE_FOR_reload_dd_si_store; rs6000_vector_reload[DDmode][1] = CODE_FOR_reload_dd_si_load; } + if (TARGET_P8_VECTOR) + { + rs6000_vector_reload[SFmode][0] = CODE_FOR_reload_sf_si_store; + rs6000_vector_reload[SFmode][1] = CODE_FOR_reload_sf_si_load; + rs6000_vector_reload[SDmode][0] = CODE_FOR_reload_sd_si_store; + rs6000_vector_reload[SDmode][1] = CODE_FOR_reload_sd_si_load; + } if (TARGET_VSX_TIMODE) { rs6000_vector_reload[TImode][0] = CODE_FOR_reload_ti_si_store; @@ -5385,6 +5505,72 @@ gpr_or_gpr_p (rtx op0, rtx op1) || (REG_P (op1) && INT_REGNO_P (REGNO (op1)))); } +/* Return true if this is a move direct operation between GPR registers and + floating point/VSX registers. */ + +bool +direct_move_p (rtx op0, rtx op1) +{ + int regno0, regno1; + + if (!REG_P (op0) || !REG_P (op1)) + return false; + + if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR) + return false; + + regno0 = REGNO (op0); + regno1 = REGNO (op1); + if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER) + return false; + + if (INT_REGNO_P (regno0)) + return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1); + + else if (INT_REGNO_P (regno1)) + { + if (TARGET_MFPGPR && FP_REGNO_P (regno0)) + return true; + + else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0)) + return true; + } + + return false; +} + +/* Return true if this is a load or store quad operation. */ + +bool +quad_load_store_p (rtx op0, rtx op1) +{ + bool ret; + + if (!TARGET_QUAD_MEMORY) + ret = false; + + else if (REG_P (op0) && MEM_P (op1)) + ret = (quad_int_reg_operand (op0, GET_MODE (op0)) + && quad_memory_operand (op1, GET_MODE (op1)) + && !reg_overlap_mentioned_p (op0, op1)); + + else if (MEM_P (op0) && REG_P (op1)) + ret = (quad_memory_operand (op0, GET_MODE (op0)) + && quad_int_reg_operand (op1, GET_MODE (op1))); + + else + ret = false; + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\n========== quad_load_store, return %s\n", + ret ? "true" : "false"); + debug_rtx (gen_rtx_SET (VOIDmode, op0, op1)); + } + + return ret; +} + /* Given an address, return a constant offset term if one exists. */ static rtx @@ -5903,8 +6089,11 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx) return force_reg (Pmode, XEXP (x, 0)); + /* For TImode with load/store quad, restrict addresses to just a single + pointer, so it works with both GPRs and VSX registers. */ /* Make sure both operands are registers. */ - else if (GET_CODE (x) == PLUS) + else if (GET_CODE (x) == PLUS + && (mode != TImode || !TARGET_QUAD_MEMORY)) return gen_rtx_PLUS (Pmode, force_reg (Pmode, XEXP (x, 0)), force_reg (Pmode, XEXP (x, 1))); @@ -6858,6 +7047,13 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict) if (reg_offset_p && legitimate_constant_pool_address_p (x, mode, reg_ok_strict)) return 1; + /* For TImode, if we have load/store quad, only allow register indirect + addresses. This will allow the values to go in either GPRs or VSX + registers without reloading. The vector types would tend to go into VSX + registers, so we allow REG+REG, while TImode seems somewhat split, in that + some uses are GPR based, and some VSX based. */ + if (mode == TImode && TARGET_QUAD_MEMORY) + return 0; /* If not REG_OK_STRICT (before reload) let pass any stack offset. */ if (! reg_ok_strict && reg_offset_p @@ -14001,29 +14197,226 @@ rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED) return NULL_TREE; } -enum reload_reg_type { - GPR_REGISTER_TYPE, - VECTOR_REGISTER_TYPE, - OTHER_REGISTER_TYPE -}; +/* Classify a register type. Because the FMRGOW/FMRGEW instructions only work + on traditional floating point registers, and the VMRGOW/VMRGEW instructions + only work on the traditional altivec registers, note if an altivec register + was choosen. */ -static enum reload_reg_type -rs6000_reload_register_type (enum reg_class rclass) +static enum rs6000_reg_type +register_to_reg_type (rtx reg, bool *is_altivec) { - switch (rclass) + HOST_WIDE_INT regno; + enum reg_class rclass; + + if (GET_CODE (reg) == SUBREG) + reg = SUBREG_REG (reg); + + if (!REG_P (reg)) + return NO_REG_TYPE; + + regno = REGNO (reg); + if (regno >= FIRST_PSEUDO_REGISTER) { - case GENERAL_REGS: - case BASE_REGS: - return GPR_REGISTER_TYPE; + if (!lra_in_progress && !reload_in_progress && !reload_completed) + return PSEUDO_REG_TYPE; - case FLOAT_REGS: - case ALTIVEC_REGS: - case VSX_REGS: - return VECTOR_REGISTER_TYPE; + regno = true_regnum (reg); + if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER) + return PSEUDO_REG_TYPE; + } - default: - return OTHER_REGISTER_TYPE; + gcc_assert (regno >= 0); + + if (is_altivec && ALTIVEC_REGNO_P (regno)) + *is_altivec = true; + + rclass = rs6000_regno_regclass[regno]; + return reg_class_to_reg_type[(int)rclass]; +} + +/* Helper function for rs6000_secondary_reload to return true if a move to a + different register classe is really a simple move. */ + +static bool +rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, + enum rs6000_reg_type from_type, + enum machine_mode mode) +{ + int size; + + /* Add support for various direct moves available. In this function, we only + look at cases where we don't need any extra registers, and one or more + simple move insns are issued. At present, 32-bit integers are not allowed + in FPR/VSX registers. Single precision binary floating is not a simple + move because we need to convert to the single precision memory layout. + The 4-byte SDmode can be moved. */ + size = GET_MODE_SIZE (mode); + if (TARGET_DIRECT_MOVE + && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8)) + && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) + || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE))) + return true; + + else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8 + && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE) + || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE))) + return true; + + else if ((size == 4 || (TARGET_POWERPC64 && size == 8)) + && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE) + || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE))) + return true; + + return false; +} + +/* Power8 helper function for rs6000_secondary_reload, handle all of the + special direct moves that involve allocating an extra register, return the + insn code of the helper function if there is such a function or + CODE_FOR_nothing if not. */ + +static bool +rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type, + enum rs6000_reg_type from_type, + enum machine_mode mode, + secondary_reload_info *sri, + bool altivec_p) +{ + bool ret = false; + enum insn_code icode = CODE_FOR_nothing; + int cost = 0; + int size = GET_MODE_SIZE (mode); + + if (TARGET_POWERPC64) + { + if (size == 16) + { + /* Handle moving 128-bit values from GPRs to VSX point registers on + power8 when running in 64-bit mode using XXPERMDI to glue the two + 64-bit values back together. */ + if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) + { + cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */ + icode = reload_vsx_gpr[(int)mode]; + } + + /* Handle moving 128-bit values from VSX point registers to GPRs on + power8 when running in 64-bit mode using XXPERMDI to get access to the + bottom 64-bit value. */ + else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) + { + cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */ + icode = reload_gpr_vsx[(int)mode]; + } + } + + else if (mode == SFmode) + { + if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) + { + cost = 3; /* xscvdpspn, mfvsrd, and. */ + icode = reload_gpr_vsx[(int)mode]; + } + + else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) + { + cost = 2; /* mtvsrz, xscvspdpn. */ + icode = reload_vsx_gpr[(int)mode]; + } + } + } + + if (TARGET_POWERPC64 && size == 16) + { + /* Handle moving 128-bit values from GPRs to VSX point registers on + power8 when running in 64-bit mode using XXPERMDI to glue the two + 64-bit values back together. */ + if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) + { + cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */ + icode = reload_vsx_gpr[(int)mode]; + } + + /* Handle moving 128-bit values from VSX point registers to GPRs on + power8 when running in 64-bit mode using XXPERMDI to get access to the + bottom 64-bit value. */ + else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) + { + cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */ + icode = reload_gpr_vsx[(int)mode]; + } + } + + else if (!TARGET_POWERPC64 && size == 8) + { + /* Handle moving 64-bit values from GPRs to floating point registers on + power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit + values back together. Altivec register classes must be handled + specially since a different instruction is used, and the secondary + reload support requires a single instruction class in the scratch + register constraint. However, right now TFmode is not allowed in + Altivec registers, so the pattern will never match. */ + if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p) + { + cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */ + icode = reload_fpr_gpr[(int)mode]; + } } + + if (icode != CODE_FOR_nothing) + { + ret = true; + if (sri) + { + sri->icode = icode; + sri->extra_cost = cost; + } + } + + return ret; +} + +/* Return whether a move between two register classes can be done either + directly (simple move) or via a pattern that uses a single extra temporary + (using power8's direct move in this case. */ + +static bool +rs6000_secondary_reload_move (enum rs6000_reg_type to_type, + enum rs6000_reg_type from_type, + enum machine_mode mode, + secondary_reload_info *sri, + bool altivec_p) +{ + /* Fall back to load/store reloads if either type is not a register. */ + if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE) + return false; + + /* If we haven't allocated registers yet, assume the move can be done for the + standard register types. */ + if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE) + || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type)) + || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type))) + return true; + + /* Moves to the same set of registers is a simple move for non-specialized + registers. */ + if (to_type == from_type && IS_STD_REG_TYPE (to_type)) + return true; + + /* Check whether a simple move can be done directly. */ + if (rs6000_secondary_reload_simple_move (to_type, from_type, mode)) + { + if (sri) + { + sri->icode = CODE_FOR_nothing; + sri->extra_cost = 0; + } + return true; + } + + /* Now check if we can do it in a few steps. */ + return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri, + altivec_p); } /* Inform reload about cases where moving X with a mode MODE to a register in @@ -14049,11 +14442,32 @@ rs6000_secondary_reload (bool in_p, bool default_p = false; sri->icode = CODE_FOR_nothing; - - /* Convert vector loads and stores into gprs to use an additional base - register. */ icode = rs6000_vector_reload[mode][in_p != false]; - if (icode != CODE_FOR_nothing) + + if (REG_P (x) || register_operand (x, mode)) + { + enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass]; + bool altivec_p = (rclass == ALTIVEC_REGS); + enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p); + + if (!in_p) + { + enum rs6000_reg_type exchange = to_type; + to_type = from_type; + from_type = exchange; + } + + if (rs6000_secondary_reload_move (to_type, from_type, mode, sri, + altivec_p)) + { + icode = (enum insn_code)sri->icode; + default_p = false; + ret = NO_REGS; + } + } + + /* Handle vector moves with reload helper functions. */ + if (ret == ALL_REGS && icode != CODE_FOR_nothing) { ret = NO_REGS; sri->icode = CODE_FOR_nothing; @@ -14065,12 +14479,21 @@ rs6000_secondary_reload (bool in_p, /* Loads to and stores from gprs can do reg+offset, and wouldn't need an extra register in that case, but it would need an extra - register if the addressing is reg+reg or (reg+reg)&(-16). */ + register if the addressing is reg+reg or (reg+reg)&(-16). Special + case load/store quad. */ if (rclass == GENERAL_REGS || rclass == BASE_REGS) { - if (!legitimate_indirect_address_p (addr, false) - && !rs6000_legitimate_offset_address_p (PTImode, addr, - false, true)) + if (TARGET_POWERPC64 && TARGET_QUAD_MEMORY + && GET_MODE_SIZE (mode) == 16 + && quad_memory_operand (x, mode)) + { + sri->icode = icode; + sri->extra_cost = 2; + } + + else if (!legitimate_indirect_address_p (addr, false) + && !rs6000_legitimate_offset_address_p (PTImode, addr, + false, true)) { sri->icode = icode; /* account for splitting the loads, and converting the @@ -14084,7 +14507,7 @@ rs6000_secondary_reload (bool in_p, else if ((rclass == FLOAT_REGS || rclass == NO_REGS) && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) && (legitimate_indirect_address_p (addr, false) - || legitimate_indirect_address_p (XEXP (addr, 0), false) + || legitimate_indirect_address_p (addr, false) || rs6000_legitimate_offset_address_p (mode, addr, false, true))) @@ -14136,12 +14559,12 @@ rs6000_secondary_reload (bool in_p, else { enum reg_class xclass = REGNO_REG_CLASS (regno); - enum reload_reg_type rtype1 = rs6000_reload_register_type (rclass); - enum reload_reg_type rtype2 = rs6000_reload_register_type (xclass); + enum rs6000_reg_type rtype1 = reg_class_to_reg_type[(int)rclass]; + enum rs6000_reg_type rtype2 = reg_class_to_reg_type[(int)xclass]; /* If memory is needed, use default_secondary_reload to create the stack slot. */ - if (rtype1 != rtype2 || rtype1 == OTHER_REGISTER_TYPE) + if (rtype1 != rtype2 || !IS_STD_REG_TYPE (rtype1)) default_p = true; else ret = NO_REGS; @@ -14151,7 +14574,7 @@ rs6000_secondary_reload (bool in_p, default_p = true; } else if (TARGET_POWERPC64 - && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE + && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE && MEM_P (x) && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD) { @@ -14190,7 +14613,7 @@ rs6000_secondary_reload (bool in_p, default_p = true; } else if (!TARGET_POWERPC64 - && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE + && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE && MEM_P (x) && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) { @@ -14753,42 +15176,25 @@ rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass) set and vice versa. */ static bool -rs6000_secondary_memory_needed (enum reg_class class1, - enum reg_class class2, +rs6000_secondary_memory_needed (enum reg_class from_class, + enum reg_class to_class, enum machine_mode mode) { - if (class1 == class2) - return false; - - /* Under VSX, there are 3 register classes that values could be in (VSX_REGS, - ALTIVEC_REGS, and FLOAT_REGS). We don't need to use memory to copy - between these classes. But we need memory for other things that can go in - FLOAT_REGS like SFmode. */ - if (TARGET_VSX - && (VECTOR_MEM_VSX_P (mode) || VECTOR_UNIT_VSX_P (mode)) - && (class1 == VSX_REGS || class1 == ALTIVEC_REGS - || class1 == FLOAT_REGS)) - return (class2 != VSX_REGS && class2 != ALTIVEC_REGS - && class2 != FLOAT_REGS); - - if (class1 == VSX_REGS || class2 == VSX_REGS) - return true; + enum rs6000_reg_type from_type, to_type; + bool altivec_p = ((from_class == ALTIVEC_REGS) + || (to_class == ALTIVEC_REGS)); - if (class1 == FLOAT_REGS - && (!TARGET_MFPGPR || !TARGET_POWERPC64 - || ((mode != DFmode) - && (mode != DDmode) - && (mode != DImode)))) - return true; + /* If a simple/direct move is available, we don't need secondary memory */ + from_type = reg_class_to_reg_type[(int)from_class]; + to_type = reg_class_to_reg_type[(int)to_class]; - if (class2 == FLOAT_REGS - && (!TARGET_MFPGPR || !TARGET_POWERPC64 - || ((mode != DFmode) - && (mode != DDmode) - && (mode != DImode)))) - return true; + if (rs6000_secondary_reload_move (to_type, from_type, mode, + (secondary_reload_info *)0, altivec_p)) + return false; - if (class1 == ALTIVEC_REGS || class2 == ALTIVEC_REGS) + /* If we have a floating point or vector register class, we need to use + memory to transfer the data. */ + if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type)) return true; return false; @@ -14796,17 +15202,19 @@ rs6000_secondary_memory_needed (enum reg_class class1, /* Debug version of rs6000_secondary_memory_needed. */ static bool -rs6000_debug_secondary_memory_needed (enum reg_class class1, - enum reg_class class2, +rs6000_debug_secondary_memory_needed (enum reg_class from_class, + enum reg_class to_class, enum machine_mode mode) { - bool ret = rs6000_secondary_memory_needed (class1, class2, mode); + bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode); fprintf (stderr, - "rs6000_secondary_memory_needed, return: %s, class1 = %s, " - "class2 = %s, mode = %s\n", - ret ? "true" : "false", reg_class_names[class1], - reg_class_names[class2], GET_MODE_NAME (mode)); + "rs6000_secondary_memory_needed, return: %s, from_class = %s, " + "to_class = %s, mode = %s\n", + ret ? "true" : "false", + reg_class_names[from_class], + reg_class_names[to_class], + GET_MODE_NAME (mode)); return ret; } @@ -15012,6 +15420,170 @@ rs6000_debug_cannot_change_mode_class (enum machine_mode from, return ret; } +/* Return a string to do a move operation of 128 bits of data. */ + +const char * +rs6000_output_move_128bit (rtx operands[]) +{ + rtx dest = operands[0]; + rtx src = operands[1]; + enum machine_mode mode = GET_MODE (dest); + int dest_regno; + int src_regno; + bool dest_gpr_p, dest_fp_p, dest_av_p, dest_vsx_p; + bool src_gpr_p, src_fp_p, src_av_p, src_vsx_p; + + if (REG_P (dest)) + { + dest_regno = REGNO (dest); + dest_gpr_p = INT_REGNO_P (dest_regno); + dest_fp_p = FP_REGNO_P (dest_regno); + dest_av_p = ALTIVEC_REGNO_P (dest_regno); + dest_vsx_p = dest_fp_p | dest_av_p; + } + else + { + dest_regno = -1; + dest_gpr_p = dest_fp_p = dest_av_p = dest_vsx_p = false; + } + + if (REG_P (src)) + { + src_regno = REGNO (src); + src_gpr_p = INT_REGNO_P (src_regno); + src_fp_p = FP_REGNO_P (src_regno); + src_av_p = ALTIVEC_REGNO_P (src_regno); + src_vsx_p = src_fp_p | src_av_p; + } + else + { + src_regno = -1; + src_gpr_p = src_fp_p = src_av_p = src_vsx_p = false; + } + + /* Register moves. */ + if (dest_regno >= 0 && src_regno >= 0) + { + if (dest_gpr_p) + { + if (src_gpr_p) + return "#"; + + else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p) + return "#"; + } + + else if (TARGET_VSX && dest_vsx_p) + { + if (src_vsx_p) + return "xxlor %x0,%x1,%x1"; + + else if (TARGET_DIRECT_MOVE && src_gpr_p) + return "#"; + } + + else if (TARGET_ALTIVEC && dest_av_p && src_av_p) + return "vor %0,%1,%1"; + + else if (dest_fp_p && src_fp_p) + return "#"; + } + + /* Loads. */ + else if (dest_regno >= 0 && MEM_P (src)) + { + if (dest_gpr_p) + { + if (TARGET_QUAD_MEMORY && (dest_regno & 1) == 0 + && quad_memory_operand (src, mode) + && !reg_overlap_mentioned_p (dest, src)) + { + /* lq/stq only has DQ-form, so avoid X-form that %y produces. */ + return REG_P (XEXP (src, 0)) ? "lq %0,%1" : "lq %0,%y1"; + } + else + return "#"; + } + + else if (TARGET_ALTIVEC && dest_av_p + && altivec_indexed_or_indirect_operand (src, mode)) + return "lvx %0,%y1"; + + else if (TARGET_VSX && dest_vsx_p) + { + if (mode == V16QImode || mode == V8HImode || mode == V4SImode) + return "lxvw4x %x0,%y1"; + else + return "lxvd2x %x0,%y1"; + } + + else if (TARGET_ALTIVEC && dest_av_p) + return "lvx %0,%y1"; + + else if (dest_fp_p) + return "#"; + } + + /* Stores. */ + else if (src_regno >= 0 && MEM_P (dest)) + { + if (src_gpr_p) + { + if (TARGET_QUAD_MEMORY && (src_regno & 1) == 0 + && quad_memory_operand (dest, mode)) + { + /* lq/stq only has DQ-form, so avoid X-form that %y produces. */ + return REG_P (XEXP (dest, 0)) ? "stq %1,%0" : "stq %1,%y0"; + } + else + return "#"; + } + + else if (TARGET_ALTIVEC && src_av_p + && altivec_indexed_or_indirect_operand (src, mode)) + return "stvx %1,%y0"; + + else if (TARGET_VSX && src_vsx_p) + { + if (mode == V16QImode || mode == V8HImode || mode == V4SImode) + return "stxvw4x %x1,%y0"; + else + return "stxvd2x %x1,%y0"; + } + + else if (TARGET_ALTIVEC && src_av_p) + return "stvx %1,%y0"; + + else if (src_fp_p) + return "#"; + } + + /* Constants. */ + else if (dest_regno >= 0 + && (GET_CODE (src) == CONST_INT + || GET_CODE (src) == CONST_DOUBLE + || GET_CODE (src) == CONST_VECTOR)) + { + if (dest_gpr_p) + return "#"; + + else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode)) + return "xxlxor %x0,%x0,%x0"; + + else if (TARGET_ALTIVEC && dest_av_p) + return output_vec_const_move (operands); + } + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\n===== Bad 128 bit move:\n"); + debug_rtx (gen_rtx_SET (VOIDmode, dest, src)); + } + + gcc_unreachable (); +} + + /* Given a comparison operation, return the bit number in CCR to test. We know this is a valid comparison. diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 478831e..93aa039 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -127,6 +127,13 @@ UNSPEC_LFIWZX UNSPEC_FCTIWUZ UNSPEC_GRP_END_NOP + UNSPEC_P8V_FMRGOW + UNSPEC_P8V_MTVSRWZ + UNSPEC_P8V_RELOAD_FROM_GPR + UNSPEC_P8V_MTVSRD + UNSPEC_P8V_XXPERMDI + UNSPEC_P8V_RELOAD_FROM_VSX + UNSPEC_FUSION_GPR ]) ;; @@ -273,6 +280,15 @@ (define_mode_iterator FMOVE128 [(TF "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128") (TD "TARGET_HARD_FLOAT && TARGET_FPRS")]) +; Iterators for 128 bit types for direct move +(define_mode_iterator FMOVE128_GPR [(TI "TARGET_VSX_TIMODE") + (V16QI "") + (V8HI "") + (V4SI "") + (V4SF "") + (V2DI "") + (V2DF "")]) + ; Whether a floating point move is ok, don't allow SD without hardware FP (define_mode_attr fmove_ok [(SF "") (DF "") @@ -289,11 +305,16 @@ (define_mode_attr f32_lr [(SF "f") (SD "wz")]) (define_mode_attr f32_lm [(SF "m") (SD "Z")]) (define_mode_attr f32_li [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")]) +(define_mode_attr f32_lv [(SF "lxsspx %0,%y1") (SD "lxsiwzx %0,%y1")]) ; Definitions for store from 32-bit fpr register (define_mode_attr f32_sr [(SF "f") (SD "wx")]) (define_mode_attr f32_sm [(SF "m") (SD "Z")]) (define_mode_attr f32_si [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")]) +(define_mode_attr f32_sv [(SF "stxsspx %1,%y0") (SD "stxsiwzx %1,%y0")]) + +; Definitions for 32-bit fpr direct move +(define_mode_attr f32_dm [(SF "wn") (SD "wm")]) ; These modes do not fit in integer registers in 32-bit mode. ; but on e500v2, the gpr are 64 bit registers @@ -373,7 +394,7 @@ (define_insn "*zero_extenddi2_internal1" [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r") (zero_extend:DI (match_operand:QHSI 1 "reg_or_mem_operand" "m,r")))] - "TARGET_POWERPC64" + "TARGET_POWERPC64 && (mode != SImode || !TARGET_LFIWZX)" "@ lz%U1%X1 %0,%1 rldicl %0,%1,0," @@ -439,6 +460,29 @@ (const_int 0)))] "") +(define_insn "*zero_extendsidi2_lfiwzx" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wz,!wm") + (zero_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "m,r,r,Z,Z")))] + "TARGET_POWERPC64 && TARGET_LFIWZX" + "@ + lwz%U1%X1 %0,%1 + rldicl %0,%1,0,32 + mtvsrwz %x0,%1 + lfiwzx %0,%y1 + lxsiwzx %x0,%y1" + [(set_attr_alternative "type" + [(if_then_else + (match_test "update_indexed_address_mem (operands[1], VOIDmode)") + (const_string "load_ux") + (if_then_else + (match_test "update_address_mem (operands[1], VOIDmode)") + (const_string "load_u") + (const_string "load"))) + (const_string "*") + (const_string "mffgpr") + (const_string "fpload") + (const_string "fpload")])]) + (define_insn "extendqidi2" [(set (match_operand:DI 0 "gpc_reg_operand" "=r") (sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" "r")))] @@ -586,10 +630,33 @@ "TARGET_POWERPC64" "") -(define_insn "" +(define_insn "*extendsidi2_lfiwax" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wl,!wm") + (sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r,r,Z,Z")))] + "TARGET_POWERPC64 && TARGET_LFIWAX" + "@ + lwa%U1%X1 %0,%1 + extsw %0,%1 + mtvsrwa %x0,%1 + lfiwax %0,%y1 + lxsiwax %x0,%y1" + [(set_attr_alternative "type" + [(if_then_else + (match_test "update_indexed_address_mem (operands[1], VOIDmode)") + (const_string "load_ext_ux") + (if_then_else + (match_test "update_address_mem (operands[1], VOIDmode)") + (const_string "load_ext_u") + (const_string "load_ext"))) + (const_string "exts") + (const_string "mffgpr") + (const_string "fpload") + (const_string "fpload")])]) + +(define_insn "*extendsidi2_nocell" [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r") (sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r")))] - "TARGET_POWERPC64 && rs6000_gen_cell_microcode" + "TARGET_POWERPC64 && rs6000_gen_cell_microcode && !TARGET_LFIWAX" "@ lwa%U1%X1 %0,%1 extsw %0,%1" @@ -603,7 +670,7 @@ (const_string "load_ext"))) (const_string "exts")])]) -(define_insn "" +(define_insn "*extendsidi2_nocell" [(set (match_operand:DI 0 "gpc_reg_operand" "=r") (sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "r")))] "TARGET_POWERPC64 && !rs6000_gen_cell_microcode" @@ -2040,7 +2107,9 @@ [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] UNSPEC_PARITY))] "TARGET_CMPB && TARGET_POPCNTB" - "prty %0,%1") + "prty %0,%1" + [(set_attr "length" "4") + (set_attr "type" "popcnt")]) (define_expand "parity2" [(set (match_operand:GPR 0 "gpc_reg_operand" "") @@ -4321,7 +4390,7 @@ # # #" - [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") (set_attr "length" "4,4,4,8,8,8")]) (define_split @@ -4353,7 +4422,7 @@ # # #" - [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") + [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare") (set_attr "length" "4,4,4,8,8,8")]) (define_split @@ -5558,12 +5627,15 @@ ; We don't define lfiwax/lfiwzx with the normal definition, because we ; don't want to support putting SImode in FPR registers. (define_insn "lfiwax" - [(set (match_operand:DI 0 "gpc_reg_operand" "=d") - (unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")] + [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm") + (unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")] UNSPEC_LFIWAX))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX" - "lfiwax %0,%y1" - [(set_attr "type" "fpload")]) + "@ + lfiwax %0,%y1 + lxsiwax %x0,%y1 + mtvsrwa %x0,%1" + [(set_attr "type" "fpload,fpload,mffgpr")]) ; This split must be run before register allocation because it allocates the ; memory slot that is needed to move values to/from the FPR. We don't allocate @@ -5585,7 +5657,8 @@ rtx src = operands[1]; rtx tmp; - if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64) + if (!MEM_P (src) && TARGET_POWERPC64 + && (TARGET_MFPGPR || TARGET_DIRECT_MOVE)) tmp = convert_to_mode (DImode, src, false); else { @@ -5634,12 +5707,15 @@ (set_attr "type" "fpload")]) (define_insn "lfiwzx" - [(set (match_operand:DI 0 "gpc_reg_operand" "=d") - (unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")] + [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm") + (unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")] UNSPEC_LFIWZX))] "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX" - "lfiwzx %0,%y1" - [(set_attr "type" "fpload")]) + "@ + lfiwzx %0,%y1 + lxsiwzx %x0,%y1 + mtvsrwz %x0,%1" + [(set_attr "type" "fpload,fpload,mftgpr")]) (define_insn_and_split "floatunssi2_lfiwzx" [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d") @@ -5656,7 +5732,8 @@ rtx src = operands[1]; rtx tmp; - if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64) + if (!MEM_P (src) && TARGET_POWERPC64 + && (TARGET_MFPGPR || TARGET_DIRECT_MOVE)) tmp = convert_to_mode (DImode, src, true); else { @@ -5947,7 +6024,7 @@ emit_insn (gen_stfiwx (dest, tmp)); DONE; } - else if (TARGET_MFPGPR && TARGET_POWERPC64) + else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE)) { dest = gen_lowpart (DImode, dest); emit_move_insn (dest, tmp); @@ -6041,7 +6118,7 @@ emit_insn (gen_stfiwx (dest, tmp)); DONE; } - else if (TARGET_MFPGPR && TARGET_POWERPC64) + else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE)) { dest = gen_lowpart (DImode, dest); emit_move_insn (dest, tmp); @@ -8507,7 +8584,7 @@ cmpi %2,%0,0 mr. %0,%1 #" - [(set_attr "type" "cmp,compare,cmp") + [(set_attr "type" "cmp,fast_compare,cmp") (set_attr "length" "4,4,8")]) (define_split @@ -8697,8 +8774,8 @@ }") (define_insn "mov_hardfloat" - [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,,,*c*l,!r,*h,!r,!r") - (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,,,r,h,0,G,Fn"))] + [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,,,wm,Z,?,?r,*c*l,!r,*h,!r,!r") + (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,,,Z,wm,r,,r,h,0,G,Fn"))] "(gpc_reg_operand (operands[0], mode) || gpc_reg_operand (operands[1], mode)) && (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)" @@ -8711,6 +8788,10 @@ xxlxor %x0,%x0,%x0 + + + mtvsrwz %x0,%1 + mfvsrwz %0,%x1 mt%0 %1 mf%1 %0 nop @@ -8749,16 +8830,20 @@ (match_test "update_address_mem (operands[0], VOIDmode)") (const_string "fpstore_u") (const_string "fpstore"))) + (const_string "fpload") + (const_string "fpstore") + (const_string "mftgpr") + (const_string "mffgpr") (const_string "mtjmpr") (const_string "mfjmpr") (const_string "*") (const_string "*") (const_string "*")]) - (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,8")]) + (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8")]) (define_insn "*mov_softfloat" [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h") - (match_operand:FMOVE32 1 "input_operand" "r, r,h,m,r,I,L,G,Fn,0"))] + (match_operand:FMOVE32 1 "input_operand" "r,r,h,m,r,I,L,G,Fn,0"))] "(gpc_reg_operand (operands[0], mode) || gpc_reg_operand (operands[1], mode)) && (TARGET_SOFT_FLOAT || !TARGET_FPRS)" @@ -8971,8 +9056,8 @@ ; ld/std require word-aligned displacements -> 'Y' constraint. ; List Y->r and r->Y before r->r for reload. (define_insn "*mov_hardfloat64" - [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg") - (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,r,h,0,G,H,F,wg,r"))] + [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,wm") + (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,r,h,0,G,H,F,wg,r,wm,r"))] "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && (gpc_reg_operand (operands[0], mode) || gpc_reg_operand (operands[1], mode))" @@ -8997,7 +9082,9 @@ # # mftgpr %0,%1 - mffgpr %0,%1" + mffgpr %0,%1 + mfvsrd %0,%x1 + mtvsrd %x0,%1" [(set_attr_alternative "type" [(if_then_else (match_test "update_indexed_address_mem (operands[0], VOIDmode)") @@ -9055,8 +9142,10 @@ (const_string "*") (const_string "*") (const_string "mftgpr") + (const_string "mffgpr") + (const_string "mftgpr") (const_string "mffgpr")]) - (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4")]) + (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4,4,4")]) (define_insn "*mov_softfloat64" [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h") @@ -9436,6 +9525,216 @@ }) +;; Power8 merge instructions to allow direct move to/from floating point +;; registers in 32-bit mode. We use TF mode to get two registers to move the +;; individual 32-bit parts across. Subreg doesn't work too well on the TF +;; value, since it is allocated in reload and not all of the flow information +;; is setup for it. We have two patterns to do the two moves between gprs and +;; fprs. There isn't a dependancy between the two, but we could potentially +;; schedule other instructions between the two instructions. TFmode is +;; currently limited to traditional FPR registers. If/when this is changed, we +;; will need to revist %L to make sure it works with VSX registers, or add an +;; %x version of %L. + +(define_insn "p8_fmrgow_" + [(set (match_operand:FMOVE64X 0 "register_operand" "=d") + (unspec:FMOVE64X [(match_operand:TF 1 "register_operand" "d")] + UNSPEC_P8V_FMRGOW))] + "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "fmrgow %0,%1,%L1" + [(set_attr "type" "vecperm")]) + +(define_insn "p8_mtvsrwz_1" + [(set (match_operand:TF 0 "register_operand" "=d") + (unspec:TF [(match_operand:SI 1 "register_operand" "r")] + UNSPEC_P8V_MTVSRWZ))] + "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mtvsrwz %x0,%1" + [(set_attr "type" "mftgpr")]) + +(define_insn "p8_mtvsrwz_2" + [(set (match_operand:TF 0 "register_operand" "+d") + (unspec:TF [(match_dup 0) + (match_operand:SI 1 "register_operand" "r")] + UNSPEC_P8V_MTVSRWZ))] + "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mtvsrwz %L0,%1" + [(set_attr "type" "mftgpr")]) + +(define_insn_and_split "reload_fpr_from_gpr" + [(set (match_operand:FMOVE64X 0 "register_operand" "=ws") + (unspec:FMOVE64X [(match_operand:FMOVE64X 1 "register_operand" "r")] + UNSPEC_P8V_RELOAD_FROM_GPR)) + (clobber (match_operand:TF 2 "register_operand" "=d"))] + "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = operands[2]; + rtx gpr_hi_reg = gen_highpart (SImode, src); + rtx gpr_lo_reg = gen_lowpart (SImode, src); + + emit_insn (gen_p8_mtvsrwz_1 (tmp, gpr_hi_reg)); + emit_insn (gen_p8_mtvsrwz_2 (tmp, gpr_lo_reg)); + emit_insn (gen_p8_fmrgow_ (dest, tmp)); + DONE; +} + [(set_attr "length" "12") + (set_attr "type" "three")]) + +;; Move 128 bit values from GPRs to VSX registers in 64-bit mode +(define_insn "p8_mtvsrd_1" + [(set (match_operand:TF 0 "register_operand" "=ws") + (unspec:TF [(match_operand:DI 1 "register_operand" "r")] + UNSPEC_P8V_MTVSRD))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mtvsrd %0,%1" + [(set_attr "type" "mftgpr")]) + +(define_insn "p8_mtvsrd_2" + [(set (match_operand:TF 0 "register_operand" "+ws") + (unspec:TF [(match_dup 0) + (match_operand:DI 1 "register_operand" "r")] + UNSPEC_P8V_MTVSRD))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mtvsrd %L0,%1" + [(set_attr "type" "mftgpr")]) + +(define_insn "p8_xxpermdi_" + [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa") + (unspec:FMOVE128_GPR [(match_operand:TF 1 "register_operand" "ws")] + UNSPEC_P8V_XXPERMDI))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "xxpermdi %x0,%1,%L1,0" + [(set_attr "type" "vecperm")]) + +(define_insn_and_split "reload_vsx_from_gpr" + [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa") + (unspec:FMOVE128_GPR + [(match_operand:FMOVE128_GPR 1 "register_operand" "r")] + UNSPEC_P8V_RELOAD_FROM_GPR)) + (clobber (match_operand:TF 2 "register_operand" "=ws"))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = operands[2]; + rtx gpr_hi_reg = gen_highpart (DImode, src); + rtx gpr_lo_reg = gen_lowpart (DImode, src); + + emit_insn (gen_p8_mtvsrd_1 (tmp, gpr_hi_reg)); + emit_insn (gen_p8_mtvsrd_2 (tmp, gpr_lo_reg)); + emit_insn (gen_p8_xxpermdi_ (dest, tmp)); +} + [(set_attr "length" "12") + (set_attr "type" "three")]) + +;; Move SFmode to a VSX from a GPR register. Because scalar floating point +;; type is stored internally as double precision in the VSX registers, we have +;; to convert it from the vector format. + +(define_insn_and_split "reload_vsx_from_gprsf" + [(set (match_operand:SF 0 "register_operand" "=wa") + (unspec:SF [(match_operand:SF 1 "register_operand" "r")] + UNSPEC_P8V_RELOAD_FROM_GPR)) + (clobber (match_operand:DI 2 "register_operand" "=r"))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op0_di = simplify_gen_subreg (DImode, op0, SFmode, 0); + rtx op1_di = simplify_gen_subreg (DImode, op1, SFmode, 0); + + /* Move SF value to upper 32-bits for xscvspdpn. */ + emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32))); + emit_move_insn (op0_di, op2); + emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0)); + DONE; +} + [(set_attr "length" "8") + (set_attr "type" "two")]) + +;; Move 128 bit values from VSX registers to GPRs in 64-bit mode by doing a +;; normal 64-bit move, followed by an xxpermdi to get the bottom 64-bit value, +;; and then doing a move of that. +(define_insn "p8_mfvsrd_3_" + [(set (match_operand:DF 0 "register_operand" "=r") + (unspec:DF [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")] + UNSPEC_P8V_RELOAD_FROM_VSX))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN" + "mfvsrd %0,%x1" + [(set_attr "type" "mftgpr")]) + +(define_insn_and_split "reload_gpr_from_vsx" + [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=r") + (unspec:FMOVE128_GPR + [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")] + UNSPEC_P8V_RELOAD_FROM_VSX)) + (clobber (match_operand:FMOVE128_GPR 2 "register_operand" "=wa"))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = operands[2]; + rtx gpr_hi_reg = gen_highpart (DFmode, dest); + rtx gpr_lo_reg = gen_lowpart (DFmode, dest); + + emit_insn (gen_p8_mfvsrd_3_ (gpr_hi_reg, src)); + emit_insn (gen_vsx_xxpermdi_ (tmp, src, src, GEN_INT (3))); + emit_insn (gen_p8_mfvsrd_3_ (gpr_lo_reg, tmp)); +} + [(set_attr "length" "12") + (set_attr "type" "three")]) + +;; Move SFmode to a GPR from a VSX register. Because scalar floating point +;; type is stored internally as double precision, we have to convert it to the +;; vector format. + +(define_insn_and_split "reload_gpr_from_vsxsf" + [(set (match_operand:SF 0 "register_operand" "=r") + (unspec:SF [(match_operand:SF 1 "register_operand" "wa")] + UNSPEC_P8V_RELOAD_FROM_VSX)) + (clobber (match_operand:V4SF 2 "register_operand" "=wa"))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx diop0 = simplify_gen_subreg (DImode, op0, SFmode, 0); + + emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1)); + emit_insn (gen_p8_mfvsrd_4_disf (diop0, op2)); + emit_insn (gen_lshrdi3 (diop0, diop0, GEN_INT (32))); + DONE; +} + [(set_attr "length" "12") + (set_attr "type" "three")]) + +(define_insn "p8_mfvsrd_4_disf" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:V4SF 1 "register_operand" "wa")] + UNSPEC_P8V_RELOAD_FROM_VSX))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN" + "mfvsrd %0,%x1" + [(set_attr "type" "mftgpr")]) + + ;; Next come the multi-word integer load and store and the load and store ;; multiple insns. @@ -9484,7 +9783,8 @@ [(set (match_operand:DI 0 "gpc_reg_operand" "") (match_operand:DI 1 "const_int_operand" ""))] "! TARGET_POWERPC64 && reload_completed - && gpr_or_gpr_p (operands[0], operands[1])" + && gpr_or_gpr_p (operands[0], operands[1]) + && !direct_move_p (operands[0], operands[1])" [(set (match_dup 2) (match_dup 4)) (set (match_dup 3) (match_dup 1))] " @@ -9502,13 +9802,14 @@ [(set (match_operand:DIFD 0 "rs6000_nonimmediate_operand" "") (match_operand:DIFD 1 "input_operand" ""))] "reload_completed && !TARGET_POWERPC64 - && gpr_or_gpr_p (operands[0], operands[1])" + && gpr_or_gpr_p (operands[0], operands[1]) + && !direct_move_p (operands[0], operands[1])" [(pc)] { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }) (define_insn "*movdi_internal64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wa,?wa,r,*h,*h,?wa,r,?*wg") - (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wa,Z,wa,*h,r,0,O,*wg,r"))] + [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wa,?wa,r,*h,*h,?wa,r,?*wg,r,?*wm") + (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wa,Z,wa,*h,r,0,O,*wg,r,*wm,r"))] "TARGET_POWERPC64 && (gpc_reg_operand (operands[0], DImode) || gpc_reg_operand (operands[1], DImode))" @@ -9530,7 +9831,9 @@ nop xxlxor %x0,%x0,%x0 mftgpr %0,%1 - mffgpr %0,%1" + mffgpr %0,%1 + mfvsrd %0,%x1 + mtvsrd %x0,%1" [(set_attr_alternative "type" [(if_then_else (match_test "update_indexed_address_mem (operands[0], VOIDmode)") @@ -9579,8 +9882,10 @@ (const_string "*") (const_string "vecsimple") (const_string "mftgpr") + (const_string "mffgpr") + (const_string "mftgpr") (const_string "mffgpr")]) - (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4,4,4")]) + (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4,4,4,4,4")]) ;; Generate all one-bits and clear left or right. ;; Use (and:DI (rotate:DI ...)) to avoid anddi3 unnecessary clobber. @@ -9669,19 +9974,20 @@ (const_string "conditional")))]) (define_insn "*mov_ppc64" - [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r") - (match_operand:TI2 1 "input_operand" "r,Y,r"))] - "(TARGET_POWERPC64 - && (mode != TImode || VECTOR_MEM_NONE_P (TImode)) + [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r,r") + (match_operand:TI2 1 "input_operand" "r,Y,r,F"))] + "(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (mode) && (gpc_reg_operand (operands[0], mode) || gpc_reg_operand (operands[1], mode)))" "#" - [(set_attr "type" "store,load,*")]) + [(set_attr "type" "store,load,*,*")]) (define_split - [(set (match_operand:TI2 0 "gpc_reg_operand" "") + [(set (match_operand:TI2 0 "int_reg_operand" "") (match_operand:TI2 1 "const_double_operand" ""))] - "TARGET_POWERPC64" + "TARGET_POWERPC64 + && (VECTOR_MEM_NONE_P (mode) + || (reload_completed && INT_REGNO_P (REGNO (operands[0]))))" [(set (match_dup 2) (match_dup 4)) (set (match_dup 3) (match_dup 5))] " @@ -9708,7 +10014,9 @@ [(set (match_operand:TI2 0 "nonimmediate_operand" "") (match_operand:TI2 1 "input_operand" ""))] "reload_completed - && gpr_or_gpr_p (operands[0], operands[1])" + && gpr_or_gpr_p (operands[0], operands[1]) + && !direct_move_p (operands[0], operands[1]) + && !quad_load_store_p (operands[0], operands[1])" [(pc)] { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }) diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 9329ff5..6cfebde 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -126,7 +126,9 @@ (match_operand:VEC_L 1 "input_operand" ""))] "VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && reload_completed - && gpr_or_gpr_p (operands[0], operands[1])" + && gpr_or_gpr_p (operands[0], operands[1]) + && !direct_move_p (operands[0], operands[1]) + && !quad_load_store_p (operands[0], operands[1])" [(pc)] { rs6000_split_multireg_move (operands[0], operands[1]); diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index daf9a24..b87da82 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -213,112 +213,31 @@ ;; VSX moves (define_insn "*vsx_mov" - [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,,,?Z,?wa,?wa,*Y,*r,*r,,?wa,*r,v,wZ,v") - (match_operand:VSX_M 1 "input_operand" ",Z,,wa,Z,wa,r,Y,r,j,j,j,W,v,wZ"))] + [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,,,?Z,?wa,?wa,wQ,?&r,??Y,??r,??r,,?wa,*r,v,wZ, v") + (match_operand:VSX_M 1 "input_operand" ",Z,,wa,Z,wa,r,wQ,r,Y,r,j,j,j,W,v,wZ"))] "VECTOR_MEM_VSX_P (mode) && (register_operand (operands[0], mode) || register_operand (operands[1], mode))" { - switch (which_alternative) - { - case 0: - case 3: - gcc_assert (MEM_P (operands[0]) - && GET_CODE (XEXP (operands[0], 0)) != PRE_INC - && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC - && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY); - return "stxx %x1,%y0"; - - case 1: - case 4: - gcc_assert (MEM_P (operands[1]) - && GET_CODE (XEXP (operands[1], 0)) != PRE_INC - && GET_CODE (XEXP (operands[1], 0)) != PRE_DEC - && GET_CODE (XEXP (operands[1], 0)) != PRE_MODIFY); - return "lxx %x0,%y1"; - - case 2: - case 5: - return "xxlor %x0,%x1,%x1"; - - case 6: - case 7: - case 8: - case 11: - return "#"; - - case 9: - case 10: - return "xxlxor %x0,%x0,%x0"; - - case 12: - return output_vec_const_move (operands); - - case 13: - gcc_assert (MEM_P (operands[0]) - && GET_CODE (XEXP (operands[0], 0)) != PRE_INC - && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC - && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY); - return "stvx %1,%y0"; - - case 14: - gcc_assert (MEM_P (operands[0]) - && GET_CODE (XEXP (operands[0], 0)) != PRE_INC - && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC - && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY); - return "lvx %0,%y1"; - - default: - gcc_unreachable (); - } + return rs6000_output_move_128bit (operands); } - [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,*,*,*,vecsimple,vecsimple,*,*,vecstore,vecload")]) + [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload") + (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")]) ;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal ;; use of TImode is for unions. However for plain data movement, slightly ;; favor the vector loads (define_insn "*vsx_movti_64bit" - [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,?Y,?r,?r,?r") - (match_operand:TI 1 "input_operand" "wa, Z,wa, O,W,wZ, v, r, Y, r, n"))] + [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r") + (match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))] "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode) && (register_operand (operands[0], TImode) || register_operand (operands[1], TImode))" { - switch (which_alternative) - { - case 0: - return "stxvd2x %x1,%y0"; - - case 1: - return "lxvd2x %x0,%y1"; - - case 2: - return "xxlor %x0,%x1,%x1"; - - case 3: - return "xxlxor %x0,%x0,%x0"; - - case 4: - return output_vec_const_move (operands); - - case 5: - return "stvx %1,%y0"; - - case 6: - return "lvx %0,%y1"; - - case 7: - case 8: - case 9: - case 10: - return "#"; - - default: - gcc_unreachable (); - } + return rs6000_output_move_128bit (operands); } - [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,*,*,*,*") - (set_attr "length" " 4, 4, 4, 4, 8, 4, 4,8,8,8,8")]) + [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*") + (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")]) (define_insn "*vsx_movti_32bit" [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b718d69..8817b27 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,18 @@ +2013-06-10 Michael Meissner + Pat Haugen + Peter Bergner + + * gcc.target/powerpc/direct-move-vint1.c: New tests for power8 + direct move instructions. + * gcc.target/powerpc/direct-move-vint2.c: Likewise. + * gcc.target/powerpc/direct-move.h: Likewise. + * gcc.target/powerpc/direct-move-float1.c: Likewise. + * gcc.target/powerpc/direct-move-float2.c: Likewise. + * gcc.target/powerpc/direct-move-double1.c: Likewise. + * gcc.target/powerpc/direct-move-double2.c: Likewise. + * gcc.target/powerpc/direct-move-long1.c: Likewise. + * gcc.target/powerpc/direct-move-long2.c: Likewise. + 2013-06-10 Paolo Carlini PR c++/52440 diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-double1.c b/gcc/testsuite/gcc.target/powerpc/direct-move-double1.c new file mode 100644 index 0000000..534a04a --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/direct-move-double1.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-mcpu=power8 -O2" } */ +/* { dg-final { scan-assembler-times "mtvsrd" 1 } } */ +/* { dg-final { scan-assembler-times "mfvsrd" 1 } } */ + +/* Check code generation for direct move for long types. */ + +#define TYPE double +#define IS_FLOAT 1 +#define NO_ALTIVEC 1 + +#include "direct-move.h" diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-double2.c b/gcc/testsuite/gcc.target/powerpc/direct-move-double2.c new file mode 100644 index 0000000..750debf --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/direct-move-double2.c @@ -0,0 +1,14 @@ +/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-mcpu=power8 -O2" } */ + +/* Check whether we get the right bits for direct move at runtime. */ + +#define TYPE double +#define IS_FLOAT 1 +#define NO_ALTIVEC 1 +#define DO_MAIN + +#include "direct-move.h" diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c b/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c new file mode 100644 index 0000000..ff1e97c --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/direct-move-float1.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-mcpu=power8 -O2" } */ +/* { dg-final { scan-assembler-times "mtvsrd" 2 } } */ +/* { dg-final { scan-assembler-times "mfvsrd" 2 } } */ +/* { dg-final { scan-assembler-times "xscvdpspn" 2 } } */ +/* { dg-final { scan-assembler-times "xscvspdpn" 2 } } */ + +/* Check code generation for direct move for long types. */ + +#define TYPE float +#define IS_FLOAT 1 +#define NO_ALTIVEC 1 + +#include "direct-move.h" diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-float2.c b/gcc/testsuite/gcc.target/powerpc/direct-move-float2.c new file mode 100644 index 0000000..ace728f --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/direct-move-float2.c @@ -0,0 +1,14 @@ +/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-mcpu=power8 -O2" } */ + +/* Check whether we get the right bits for direct move at runtime. */ + +#define TYPE float +#define IS_FLOAT 1 +#define NO_ALTIVEC 1 +#define DO_MAIN + +#include "direct-move.h" diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-long1.c b/gcc/testsuite/gcc.target/powerpc/direct-move-long1.c new file mode 100644 index 0000000..907e802 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/direct-move-long1.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-mcpu=power8 -O2" } */ +/* { dg-final { scan-assembler-times "mtvsrd" 1 } } */ +/* { dg-final { scan-assembler-times "mfvsrd" 2 } } */ + +/* Check code generation for direct move for long types. */ + +#define TYPE long +#define IS_INT 1 +#define NO_ALTIVEC 1 + +#include "direct-move.h" diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-long2.c b/gcc/testsuite/gcc.target/powerpc/direct-move-long2.c new file mode 100644 index 0000000..fba613e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/direct-move-long2.c @@ -0,0 +1,14 @@ +/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-mcpu=power8 -O2" } */ + +/* Check whether we get the right bits for direct move at runtime. */ + +#define TYPE long +#define IS_INT 1 +#define NO_ALTIVEC 1 +#define DO_MAIN + +#include "direct-move.h" diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-vint1.c b/gcc/testsuite/gcc.target/powerpc/direct-move-vint1.c new file mode 100644 index 0000000..cdfa188 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/direct-move-vint1.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-mcpu=power8 -O2" } */ +/* { dg-final { scan-assembler-times "mtvsrd" 4 } } */ +/* { dg-final { scan-assembler-times "mfvsrd" 4 } } */ + +/* Check code generation for direct move for long types. */ + +#define TYPE vector int + +#include "direct-move.h" diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-vint2.c b/gcc/testsuite/gcc.target/powerpc/direct-move-vint2.c new file mode 100644 index 0000000..5c0c9ab --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/direct-move-vint2.c @@ -0,0 +1,12 @@ +/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-mcpu=power8 -O2" } */ + +/* Check whether we get the right bits for direct move at runtime. */ + +#define TYPE vector int +#define DO_MAIN + +#include "direct-move.h" diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move.h b/gcc/testsuite/gcc.target/powerpc/direct-move.h new file mode 100644 index 0000000..4e84fd6 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/direct-move.h @@ -0,0 +1,183 @@ +/* Test functions for direct move support. */ + + +void __attribute__((__noinline__)) +copy (TYPE *a, TYPE *b) +{ + *b = *a; +} + +#ifndef NO_GPR +void __attribute__((__noinline__)) +load_gpr (TYPE *a, TYPE *b) +{ + TYPE c = *a; + __asm__ ("# gpr, reg = %0" : "+b" (c)); + *b = c; +} +#endif + +#ifndef NO_FPR +void __attribute__((__noinline__)) +load_fpr (TYPE *a, TYPE *b) +{ + TYPE c = *a; + __asm__ ("# fpr, reg = %0" : "+d" (c)); + *b = c; +} +#endif + +#ifndef NO_ALTIVEC +void __attribute__((__noinline__)) +load_altivec (TYPE *a, TYPE *b) +{ + TYPE c = *a; + __asm__ ("# altivec, reg = %0" : "+v" (c)); + *b = c; +} +#endif + +#ifndef NO_VSX +void __attribute__((__noinline__)) +load_vsx (TYPE *a, TYPE *b) +{ + TYPE c = *a; + __asm__ ("# vsx, reg = %x0" : "+wa" (c)); + *b = c; +} +#endif + +#ifndef NO_GPR_TO_VSX +void __attribute__((__noinline__)) +load_gpr_to_vsx (TYPE *a, TYPE *b) +{ + TYPE c = *a; + TYPE d; + __asm__ ("# gpr, reg = %0" : "+b" (c)); + d = c; + __asm__ ("# vsx, reg = %x0" : "+wa" (d)); + *b = d; +} +#endif + +#ifndef NO_VSX_TO_GPR +void __attribute__((__noinline__)) +load_vsx_to_gpr (TYPE *a, TYPE *b) +{ + TYPE c = *a; + TYPE d; + __asm__ ("# vsx, reg = %x0" : "+wa" (c)); + d = c; + __asm__ ("# gpr, reg = %0" : "+b" (d)); + *b = d; +} +#endif + +#ifdef DO_MAIN +typedef void (fn_type (TYPE *, TYPE *)); + +struct test_struct { + fn_type *func; + const char *name; +}; + +const struct test_struct test_functions[] = { + { copy, "copy" }, +#ifndef NO_GPR + { load_gpr, "load_gpr" }, +#endif +#ifndef NO_FPR + { load_fpr, "load_fpr" }, +#endif +#ifndef NO_ALTIVEC + { load_altivec, "load_altivec" }, +#endif +#ifndef NO_VSX + { load_vsx, "load_vsx" }, +#endif +#ifndef NO_GPR_TO_VSX + { load_gpr_to_vsx, "load_gpr_to_vsx" }, +#endif +#ifndef NO_VSX_TO_GPR + { load_vsx_to_gpr, "load_vsx_to_gpr" }, +#endif +}; + +/* Test a given value for each of the functions. */ +void __attribute__((__noinline__)) +test_value (TYPE a) +{ + size_t i; + + for (i = 0; i < sizeof (test_functions) / sizeof (test_functions[0]); i++) + { + TYPE b; + + test_functions[i].func (&a, &b); + if (memcmp ((void *)&a, (void *)&b, sizeof (TYPE)) != 0) + abort (); + } +} + +/* Main program. */ +int +main (void) +{ + size_t i; + long j; + union { + TYPE value; + unsigned char bytes[sizeof (TYPE)]; + } u; + +#if IS_INT + TYPE value = (TYPE)-5; + for (i = 0; i < 12; i++) + { + test_value (value); + value++; + } + + for (i = 0; i < 8*sizeof (TYPE); i++) + test_value (((TYPE)1) << i); + +#elif IS_UNS + TYPE value = (TYPE)0; + for (i = 0; i < 10; i++) + { + test_value (value); + test_value (~ value); + value++; + } + + for (i = 0; i < 8*sizeof (TYPE); i++) + test_value (((TYPE)1) << i); + +#elif IS_FLOAT + TYPE value = (TYPE)-5; + for (i = 0; i < 12; i++) + { + test_value (value); + value++; + } + + test_value ((TYPE)3.1415926535); + test_value ((TYPE)1.23456); + test_value ((TYPE)(-0.0)); + test_value ((TYPE)NAN); + test_value ((TYPE)+INFINITY); + test_value ((TYPE)-INFINITY); +#else + + for (j = 0; j < 10; j++) + { + for (i = 0; i < sizeof (TYPE); i++) + u.bytes[i] = (unsigned char) (random () >> 4); + + test_value (u.value); + } +#endif + + return 0; +} +#endif -- 2.7.4