X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=gcc%2Fconfig%2Fs390%2Fs390.c;h=b1ab0c07613ac3d6d3f679687d67b2b53f769159;hb=7e2507a578d6c82e6bb1fb05353c61f7808cf47e;hp=b994cd2630c187a3999f57e7d2aecf44e9886a12;hpb=f2c0c2431897ba9a03b97e20d9b8a2fa4d0c95f6;p=platform%2Fupstream%2Fgcc.git diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index b994cd2..b1ab0c0 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -1,5 +1,5 @@ /* Subroutines used for code generation on IBM S/390 and zSeries - Copyright (C) 1999-2015 Free Software Foundation, Inc. + Copyright (C) 1999-2016 Free Software Foundation, Inc. Contributed by Hartmut Penner (hpenner@de.ibm.com) and Ulrich Weigand (uweigand@de.ibm.com) and Andreas Krebbel (Andreas.Krebbel@de.ibm.com). @@ -24,62 +24,66 @@ along with GCC; see the file COPYING3. If not see #include "system.h" #include "coretypes.h" #include "backend.h" -#include "cfghooks.h" +#include "target.h" +#include "target-globals.h" +#include "rtl.h" #include "tree.h" #include "gimple.h" -#include "rtl.h" +#include "cfghooks.h" +#include "cfgloop.h" #include "df.h" +#include "tm_p.h" +#include "stringpool.h" +#include "expmed.h" +#include "optabs.h" +#include "regs.h" +#include "emit-rtl.h" +#include "recog.h" +#include "cgraph.h" +#include "diagnostic-core.h" +#include "diagnostic.h" #include "alias.h" #include "fold-const.h" #include "print-tree.h" -#include "stringpool.h" #include "stor-layout.h" #include "varasm.h" #include "calls.h" -#include "tm_p.h" -#include "regs.h" -#include "insn-config.h" #include "conditions.h" #include "output.h" #include "insn-attr.h" #include "flags.h" #include "except.h" -#include "recog.h" -#include "expmed.h" #include "dojump.h" #include "explow.h" -#include "emit-rtl.h" #include "stmt.h" #include "expr.h" #include "reload.h" -#include "diagnostic-core.h" #include "cfgrtl.h" #include "cfganal.h" #include "lcm.h" #include "cfgbuild.h" #include "cfgcleanup.h" -#include "target.h" #include "debug.h" #include "langhooks.h" -#include "insn-codes.h" -#include "optabs.h" #include "internal-fn.h" #include "gimple-fold.h" #include "tree-eh.h" #include "gimplify.h" #include "params.h" -#include "cfgloop.h" #include "opts.h" #include "tree-pass.h" #include "context.h" #include "builtins.h" #include "rtl-iter.h" #include "intl.h" -#include "cgraph.h" +#include "tm-constrs.h" /* This file should be included last. */ #include "target-def.h" +/* Remember the last target of s390_set_current_function. */ +static GTY(()) tree s390_previous_fndecl; + /* Define the specific costs for a given cpu. */ struct processor_costs @@ -117,7 +121,7 @@ struct processor_costs const int dsgr; }; -const struct processor_costs *s390_cost; +#define s390_cost ((const struct processor_costs *)(s390_cost_pointer)) static const struct processor_costs z900_cost = @@ -311,6 +315,27 @@ struct processor_costs zEC12_cost = COSTS_N_INSNS (160), /* DSGR cracked */ }; +static struct +{ + const char *const name; + const enum processor_type processor; + const struct processor_costs *cost; +} +const processor_table[] = +{ + { "g5", PROCESSOR_9672_G5, &z900_cost }, + { "g6", PROCESSOR_9672_G6, &z900_cost }, + { "z900", PROCESSOR_2064_Z900, &z900_cost }, + { "z990", PROCESSOR_2084_Z990, &z990_cost }, + { "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost }, + { "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost }, + { "z10", PROCESSOR_2097_Z10, &z10_cost }, + { "z196", PROCESSOR_2817_Z196, &z196_cost }, + { "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost }, + { "z13", PROCESSOR_2964_Z13, &zEC12_cost }, + { "native", PROCESSOR_NATIVE, NULL } +}; + extern int reload_completed; /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */ @@ -355,6 +380,8 @@ struct GTY (()) s390_frame_layout be saved to. 0 - does not need to be saved at all -1 - stack slot */ +#define SAVE_SLOT_NONE 0 +#define SAVE_SLOT_STACK -1 signed char gpr_save_slots[16]; /* Number of first and last gpr to be saved, restored. */ @@ -401,6 +428,13 @@ struct GTY(()) machine_function /* True if the current function may contain a tbegin clobbering FPRs. */ bool tbegin_p; + + /* For -fsplit-stack support: A stack local which holds a pointer to + the stack arguments for a function with a variable number of + arguments. This is set at the start of the function and is used + to initialize the overflow_arg_area field of the va_list + structure. */ + rtx split_stack_varargs_pointer; }; /* Few accessor macros for struct cfun->machine->s390_frame_layout. */ @@ -614,10 +648,6 @@ s390_init_builtins (void) NULL, NULL); tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL); tree c_uint64_type_node; - unsigned int bflags_mask = (BFLAGS_MASK_INIT); - - bflags_mask |= (TARGET_VX) ? B_VX : 0; - bflags_mask |= (TARGET_HTM) ? B_HTM : 0; /* The uint64_type_node from tree.c is not compatible to the C99 uint64_t data type. What we want is c_uint64_type_node from @@ -630,46 +660,46 @@ s390_init_builtins (void) #undef DEF_TYPE #define DEF_TYPE(INDEX, BFLAGS, NODE, CONST_P) \ - if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ + if (s390_builtin_types[INDEX] == NULL) \ s390_builtin_types[INDEX] = (!CONST_P) ? \ (NODE) : build_type_variant ((NODE), 1, 0); #undef DEF_POINTER_TYPE #define DEF_POINTER_TYPE(INDEX, BFLAGS, INDEX_BASE) \ - if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ + if (s390_builtin_types[INDEX] == NULL) \ s390_builtin_types[INDEX] = \ build_pointer_type (s390_builtin_types[INDEX_BASE]); #undef DEF_DISTINCT_TYPE #define DEF_DISTINCT_TYPE(INDEX, BFLAGS, INDEX_BASE) \ - if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ + if (s390_builtin_types[INDEX] == NULL) \ s390_builtin_types[INDEX] = \ build_distinct_type_copy (s390_builtin_types[INDEX_BASE]); #undef DEF_VECTOR_TYPE #define DEF_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \ - if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ + if (s390_builtin_types[INDEX] == NULL) \ s390_builtin_types[INDEX] = \ build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS); #undef DEF_OPAQUE_VECTOR_TYPE #define DEF_OPAQUE_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \ - if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ + if (s390_builtin_types[INDEX] == NULL) \ s390_builtin_types[INDEX] = \ build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS); #undef DEF_FN_TYPE #define DEF_FN_TYPE(INDEX, BFLAGS, args...) \ - if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \ + if (s390_builtin_fn_types[INDEX] == NULL) \ s390_builtin_fn_types[INDEX] = \ - build_function_type_list (args, NULL_TREE); + build_function_type_list (args, NULL_TREE); #undef DEF_OV_TYPE #define DEF_OV_TYPE(...) #include "s390-builtin-types.def" #undef B_DEF #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \ - if (((BFLAGS) & ~bflags_mask) == 0) \ + if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \ s390_builtin_decls[S390_BUILTIN_##NAME] = \ add_builtin_function ("__builtin_" #NAME, \ s390_builtin_fn_types[FNTYPE], \ @@ -679,7 +709,8 @@ s390_init_builtins (void) ATTRS); #undef OB_DEF #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \ - if (((BFLAGS) & ~bflags_mask) == 0) \ + if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \ + == NULL) \ s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \ add_builtin_function ("__builtin_" #NAME, \ s390_builtin_fn_types[FNTYPE], \ @@ -763,10 +794,29 @@ s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, if (TARGET_DEBUG_ARG) { fprintf (stderr, - "s390_expand_builtin, code = %4d, %s\n", - (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl))); + "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n", + (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)), + bflags_for_builtin (fcode)); } + if (S390_USE_TARGET_ATTRIBUTE) + { + unsigned int bflags; + + bflags = bflags_for_builtin (fcode); + if ((bflags & B_HTM) && !TARGET_HTM) + { + error ("Builtin %qF is not supported without -mhtm " + "(default with -march=zEC12 and higher).", fndecl); + return const0_rtx; + } + if ((bflags & B_VX) && !TARGET_VX) + { + error ("Builtin %qF is not supported without -mvx " + "(default with -march=z13 and higher).", fndecl); + return const0_rtx; + } + } if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET && fcode < S390_ALL_BUILTIN_MAX) { @@ -836,6 +886,15 @@ s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, insn_op = &insn_data[icode].operand[arity + nonvoid]; op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL); + /* expand_expr truncates constants to the target mode only if it + is "convenient". However, our checks below rely on this + being done. */ + if (CONST_INT_P (op[arity]) + && SCALAR_INT_MODE_P (insn_op->mode) + && GET_MODE (op[arity]) != insn_op->mode) + op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]), + insn_op->mode)); + /* Wrap the expanded RTX for pointer types into a MEM expr with the proper mode. This allows us to use e.g. (match_operand "memory_operand"..) in the insn patterns instead of (mem @@ -899,14 +958,6 @@ s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, arity++; } - if (last_vec_mode != VOIDmode && !TARGET_VX) - { - error ("Vector type builtin %qF is not supported without -mvx " - "(default with -march=z13).", - fndecl); - return const0_rtx; - } - switch (arity) { case 0: @@ -1281,7 +1332,7 @@ s390_tm_ccmode (rtx op1, rtx op2, bool mixed) { int bit0, bit1; - /* ??? Fixme: should work on CONST_DOUBLE as well. */ + /* ??? Fixme: should work on CONST_WIDE_INT as well. */ if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT) return VOIDmode; @@ -3313,6 +3364,7 @@ s390_rtx_costs (rtx x, machine_mode mode, int outer_code, case LABEL_REF: case SYMBOL_REF: case CONST_DOUBLE: + case CONST_WIDE_INT: case MEM: *total = 0; return true; @@ -3620,7 +3672,7 @@ tls_symbolic_reference_mentioned_p (rtx op) /* Return true if OP is a legitimate general operand when generating PIC code. It is given that flag_pic is on - and that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */ + and that OP satisfies CONSTANT_P. */ int legitimate_pic_operand_p (rtx op) @@ -3635,7 +3687,7 @@ legitimate_pic_operand_p (rtx op) } /* Returns true if the constant value OP is a legitimate general operand. - It is given that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */ + It is given that OP satisfies CONSTANT_P. */ static bool s390_legitimate_constant_p (machine_mode mode, rtx op) @@ -3645,9 +3697,11 @@ s390_legitimate_constant_p (machine_mode mode, rtx op) if (GET_MODE_SIZE (mode) != 16) return 0; - if (!const0_operand (op, mode) - && !s390_contiguous_bitmask_vector_p (op, NULL, NULL) - && !s390_bytemask_vector_p (op, NULL)) + if (!satisfies_constraint_j00 (op) + && !satisfies_constraint_jm1 (op) + && !satisfies_constraint_jKK (op) + && !satisfies_constraint_jxx (op) + && !satisfies_constraint_jyy (op)) return 0; } @@ -3687,6 +3741,7 @@ s390_cannot_force_const_mem (machine_mode mode, rtx x) { case CONST_INT: case CONST_DOUBLE: + case CONST_WIDE_INT: case CONST_VECTOR: /* Accept all non-symbolic constants. */ return false; @@ -3787,8 +3842,9 @@ legitimate_reload_constant_p (rtx op) return true; /* Accept double-word operands that can be split. */ - if (GET_CODE (op) == CONST_INT - && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)) + if (GET_CODE (op) == CONST_WIDE_INT + || (GET_CODE (op) == CONST_INT + && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op))) { machine_mode dword_mode = word_mode == SImode ? DImode : TImode; rtx hi = operand_subword (op, 0, 0, dword_mode); @@ -3828,14 +3884,12 @@ legitimate_reload_fp_constant_p (rtx op) static bool legitimate_reload_vector_constant_p (rtx op) { - /* FIXME: Support constant vectors with all the same 16 bit unsigned - operands. These can be loaded with vrepi. */ - if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16 - && (const0_operand (op, GET_MODE (op)) - || constm1_operand (op, GET_MODE (op)) - || s390_contiguous_bitmask_vector_p (op, NULL, NULL) - || s390_bytemask_vector_p (op, NULL))) + && (satisfies_constraint_j00 (op) + || satisfies_constraint_jm1 (op) + || satisfies_constraint_jKK (op) + || satisfies_constraint_jxx (op) + || satisfies_constraint_jyy (op))) return true; return false; @@ -3854,6 +3908,7 @@ s390_preferred_reload_class (rtx op, reg_class_t rclass) case CONST_VECTOR: case CONST_DOUBLE: case CONST_INT: + case CONST_WIDE_INT: if (reg_class_subset_p (GENERAL_REGS, rclass) && legitimate_reload_constant_p (op)) return GENERAL_REGS; @@ -3924,15 +3979,30 @@ s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment) HOST_WIDE_INT addend; rtx symref; + /* The "required alignment" might be 0 (e.g. for certain structs + accessed via BLKmode). Early abort in this case, as well as when + an alignment > 8 is required. */ + if (alignment < 2 || alignment > 8) + return false; + if (!s390_loadrelative_operand_p (addr, &symref, &addend)) return false; if (addend & (alignment - 1)) return false; - if (GET_CODE (symref) == SYMBOL_REF - && !SYMBOL_REF_NOT_NATURALLY_ALIGNED_P (symref)) - return true; + if (GET_CODE (symref) == SYMBOL_REF) + { + /* We have load-relative instructions for 2-byte, 4-byte, and + 8-byte alignment so allow only these. */ + switch (alignment) + { + case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref); + case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref); + case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref); + default: return false; + } + } if (GET_CODE (symref) == UNSPEC && alignment <= UNITS_PER_LONG) @@ -3990,6 +4060,7 @@ s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem) /* Reload might have pulled a constant out of the literal pool. Force it back in. */ if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE + || GET_CODE (mem) == CONST_WIDE_INT || GET_CODE (mem) == CONST_VECTOR || GET_CODE (mem) == CONST) mem = force_const_mem (GET_MODE (reg), mem); @@ -4064,7 +4135,7 @@ s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, if (in_p && s390_loadrelative_operand_p (x, &symref, &offset) && mode == Pmode - && !SYMBOL_REF_ALIGN1_P (symref) + && !SYMBOL_FLAG_NOTALIGN2_P (symref) && (offset & 1) == 1) sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10 : CODE_FOR_reloadsi_larl_odd_addend_z10); @@ -5180,7 +5251,12 @@ s390_expand_setmem (rtx dst, rtx len, rtx val) else if (TARGET_MVCLE) { val = force_not_mem (convert_modes (Pmode, QImode, val, 1)); - emit_insn (gen_setmem_long (dst, convert_to_mode (Pmode, len, 1), val)); + if (TARGET_64BIT) + emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1), + val)); + else + emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1), + val)); } else @@ -6072,29 +6148,70 @@ s390_expand_vcond (rtx target, rtx then, rtx els, machine_mode result_mode; rtx result_target; + machine_mode target_mode = GET_MODE (target); + machine_mode cmp_mode = GET_MODE (cmp_op1); + rtx op = (cond == LT) ? els : then; + + /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31 + and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise + for short and byte (x >> 15 and x >> 7 respectively). */ + if ((cond == LT || cond == GE) + && target_mode == cmp_mode + && cmp_op2 == CONST0_RTX (cmp_mode) + && op == CONST0_RTX (target_mode) + && s390_vector_mode_supported_p (target_mode) + && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT) + { + rtx negop = (cond == LT) ? then : els; + + int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1; + + /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */ + if (negop == CONST1_RTX (target_mode)) + { + rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1, + GEN_INT (shift), target, + 1, OPTAB_DIRECT); + if (res != target) + emit_move_insn (target, res); + return; + } + + /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */ + else if (all_ones_operand (negop, target_mode)) + { + rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1, + GEN_INT (shift), target, + 0, OPTAB_DIRECT); + if (res != target) + emit_move_insn (target, res); + return; + } + } + /* We always use an integral type vector to hold the comparison result. */ - result_mode = GET_MODE (cmp_op1) == V2DFmode ? V2DImode : GET_MODE (cmp_op1); + result_mode = cmp_mode == V2DFmode ? V2DImode : cmp_mode; result_target = gen_reg_rtx (result_mode); - /* Alternatively this could be done by reload by lowering the cmp* - predicates. But it appears to be better for scheduling etc. to - have that in early. */ + /* We allow vector immediates as comparison operands that + can be handled by the optimization above but not by the + following code. Hence, force them into registers here. */ if (!REG_P (cmp_op1)) - cmp_op1 = force_reg (GET_MODE (target), cmp_op1); + cmp_op1 = force_reg (target_mode, cmp_op1); if (!REG_P (cmp_op2)) - cmp_op2 = force_reg (GET_MODE (target), cmp_op2); + cmp_op2 = force_reg (target_mode, cmp_op2); s390_expand_vec_compare (result_target, cond, cmp_op1, cmp_op2); /* If the results are supposed to be either -1 or 0 we are done since this is what our compare instructions generate anyway. */ - if (constm1_operand (then, GET_MODE (then)) + if (all_ones_operand (then, GET_MODE (then)) && const0_operand (els, GET_MODE (els))) { - emit_move_insn (target, gen_rtx_SUBREG (GET_MODE (target), + emit_move_insn (target, gen_rtx_SUBREG (target_mode, result_target, 0)); return; } @@ -6103,10 +6220,10 @@ s390_expand_vcond (rtx target, rtx then, rtx els, /* This gets triggered e.g. with gcc.c-torture/compile/pr53410-1.c */ if (!REG_P (then)) - then = force_reg (GET_MODE (target), then); + then = force_reg (target_mode, then); if (!REG_P (els)) - els = force_reg (GET_MODE (target), els); + els = force_reg (target_mode, els); tmp = gen_rtx_fmt_ee (EQ, VOIDmode, result_target, @@ -6114,9 +6231,9 @@ s390_expand_vcond (rtx target, rtx then, rtx els, /* We compared the result against zero above so we have to swap then and els here. */ - tmp = gen_rtx_IF_THEN_ELSE (GET_MODE (target), tmp, els, then); + tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then); - gcc_assert (GET_MODE (target) == GET_MODE (then)); + gcc_assert (target_mode == GET_MODE (then)); emit_insn (gen_rtx_SET (target, tmp)); } @@ -6428,7 +6545,7 @@ s390_expand_atomic (machine_mode mode, enum rtx_code code, case SET: if (ac.aligned && MEM_P (val)) store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0, - 0, 0, SImode, val); + 0, 0, SImode, val, false); else { new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski, @@ -6678,6 +6795,65 @@ s390_function_num_hotpatch_hw (tree decl, } } +/* Write the current .machine and .machinemode specification to the assembler + file. */ + +#ifdef HAVE_AS_MACHINE_MACHINEMODE +static void +s390_asm_output_machine_for_arch (FILE *asm_out_file) +{ + fprintf (asm_out_file, "\t.machinemode %s\n", + (TARGET_ZARCH) ? "zarch" : "esa"); + fprintf (asm_out_file, "\t.machine \"%s", processor_table[s390_arch].name); + if (S390_USE_ARCHITECTURE_MODIFIERS) + { + int cpu_flags; + + cpu_flags = processor_flags_table[(int) s390_arch]; + if (TARGET_HTM && !(cpu_flags & PF_TX)) + fprintf (asm_out_file, "+htm"); + else if (!TARGET_HTM && (cpu_flags & PF_TX)) + fprintf (asm_out_file, "+nohtm"); + if (TARGET_VX && !(cpu_flags & PF_VX)) + fprintf (asm_out_file, "+vx"); + else if (!TARGET_VX && (cpu_flags & PF_VX)) + fprintf (asm_out_file, "+novx"); + } + fprintf (asm_out_file, "\"\n"); +} + +/* Write an extra function header before the very start of the function. */ + +void +s390_asm_output_function_prefix (FILE *asm_out_file, + const char *fnname ATTRIBUTE_UNUSED) +{ + if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL) + return; + /* Since only the function specific options are saved but not the indications + which options are set, it's too much work here to figure out which options + have actually changed. Thus, generate .machine and .machinemode whenever a + function has the target attribute or pragma. */ + fprintf (asm_out_file, "\t.machinemode push\n"); + fprintf (asm_out_file, "\t.machine push\n"); + s390_asm_output_machine_for_arch (asm_out_file); +} + +/* Write an extra function footer after the very end of the function. */ + +void +s390_asm_declare_function_size (FILE *asm_out_file, + const char *fnname, tree decl) +{ + if (!flag_inhibit_size_directive) + ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname); + if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL) + return; + fprintf (asm_out_file, "\t.machine pop\n"); + fprintf (asm_out_file, "\t.machinemode pop\n"); +} +#endif + /* Write the extra assembler code needed to declare a function properly. */ void @@ -6719,6 +6895,28 @@ s390_asm_output_function_label (FILE *asm_out_file, const char *fname, ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment)); } + if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG) + { + asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch); + asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune); + asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard); + asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size); + asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost); + asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname, + s390_warn_framesize); + asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN); + asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP); + asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT); + asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM); + asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX); + asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname, + TARGET_PACKED_STACK); + asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC); + asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE); + asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR); + asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname, + s390_warn_dynamicstack_p); + } ASM_OUTPUT_LABEL (asm_out_file, fname); if (hw_after > 0) asm_fprintf (asm_out_file, @@ -7033,7 +7231,7 @@ print_operand (FILE *file, rtx x, int code) break; case MEM: - output_address (XEXP (x, 0)); + output_address (GET_MODE (x), XEXP (x, 0)); break; case CONST: @@ -7097,15 +7295,16 @@ print_operand (FILE *file, rtx x, int code) fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival); break; - case CONST_DOUBLE: - gcc_assert (GET_MODE (x) == VOIDmode); + case CONST_WIDE_INT: if (code == 'b') - fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xff); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, + CONST_WIDE_INT_ELT (x, 0) & 0xff); else if (code == 'x') - fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xffff); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, + CONST_WIDE_INT_ELT (x, 0) & 0xffff); else if (code == 'h') fprintf (file, HOST_WIDE_INT_PRINT_DEC, - ((CONST_DOUBLE_LOW (x) & 0xffff) ^ 0x8000) - 0x8000); + ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000); else { if (code == 0) @@ -7119,6 +7318,11 @@ print_operand (FILE *file, rtx x, int code) case CONST_VECTOR: switch (code) { + case 'h': + gcc_assert (const_vec_duplicate_p (x)); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, + ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000); + break; case 'e': case 's': { @@ -9003,7 +9207,7 @@ s390_register_info_gprtofpr () for (i = 15; i >= 6; i--) { - if (cfun_gpr_save_slot (i) == 0) + if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE) continue; /* Advance to the next FP register which can be used as a @@ -9020,7 +9224,7 @@ s390_register_info_gprtofpr () case we ran out of FPR save slots. */ for (j = 6; j <= 15; j++) if (FP_REGNO_P (cfun_gpr_save_slot (j))) - cfun_gpr_save_slot (j) = -1; + cfun_gpr_save_slot (j) = SAVE_SLOT_STACK; break; } cfun_gpr_save_slot (i) = save_reg_slot++; @@ -9047,12 +9251,16 @@ s390_register_info_stdarg_fpr () return; min_fpr = crtl->args.info.fprs; - max_fpr = min_fpr + cfun->va_list_fpr_size; - if (max_fpr > FP_ARG_NUM_REG) - max_fpr = FP_ARG_NUM_REG; + max_fpr = min_fpr + cfun->va_list_fpr_size - 1; + if (max_fpr >= FP_ARG_NUM_REG) + max_fpr = FP_ARG_NUM_REG - 1; - for (i = min_fpr; i < max_fpr; i++) - cfun_set_fpr_save (i + FPR0_REGNUM); + /* FPR argument regs start at f0. */ + min_fpr += FPR0_REGNUM; + max_fpr += FPR0_REGNUM; + + for (i = min_fpr; i <= max_fpr; i++) + cfun_set_fpr_save (i); } /* Reserve the GPR save slots for GPRs which need to be saved due to @@ -9072,12 +9280,61 @@ s390_register_info_stdarg_gpr () return; min_gpr = crtl->args.info.gprs; - max_gpr = min_gpr + cfun->va_list_gpr_size; - if (max_gpr > GP_ARG_NUM_REG) - max_gpr = GP_ARG_NUM_REG; + max_gpr = min_gpr + cfun->va_list_gpr_size - 1; + if (max_gpr >= GP_ARG_NUM_REG) + max_gpr = GP_ARG_NUM_REG - 1; + + /* GPR argument regs start at r2. */ + min_gpr += GPR2_REGNUM; + max_gpr += GPR2_REGNUM; + + /* If r6 was supposed to be saved into an FPR and now needs to go to + the stack for vararg we have to adjust the restore range to make + sure that the restore is done from stack as well. */ + if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM)) + && min_gpr <= GPR6_REGNUM + && max_gpr >= GPR6_REGNUM) + { + if (cfun_frame_layout.first_restore_gpr == -1 + || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM) + cfun_frame_layout.first_restore_gpr = GPR6_REGNUM; + if (cfun_frame_layout.last_restore_gpr == -1 + || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM) + cfun_frame_layout.last_restore_gpr = GPR6_REGNUM; + } + + if (cfun_frame_layout.first_save_gpr == -1 + || cfun_frame_layout.first_save_gpr > min_gpr) + cfun_frame_layout.first_save_gpr = min_gpr; + + if (cfun_frame_layout.last_save_gpr == -1 + || cfun_frame_layout.last_save_gpr < max_gpr) + cfun_frame_layout.last_save_gpr = max_gpr; - for (i = min_gpr; i < max_gpr; i++) - cfun_gpr_save_slot (2 + i) = -1; + for (i = min_gpr; i <= max_gpr; i++) + cfun_gpr_save_slot (i) = SAVE_SLOT_STACK; +} + +/* Calculate the save and restore ranges for stm(g) and lm(g) in the + prologue and epilogue. */ + +static void +s390_register_info_set_ranges () +{ + int i, j; + + /* Find the first and the last save slot supposed to use the stack + to set the restore range. + Vararg regs might be marked as save to stack but only the + call-saved regs really need restoring (i.e. r6). This code + assumes that the vararg regs have not yet been recorded in + cfun_gpr_save_slot. */ + for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++); + for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--); + cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i; + cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j; + cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i; + cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j; } /* The GPR and FPR save slots in cfun->machine->frame_layout are set @@ -9088,7 +9345,7 @@ s390_register_info_stdarg_gpr () static void s390_register_info () { - int i, j; + int i; char clobbered_regs[32]; gcc_assert (!epilogue_completed); @@ -9121,9 +9378,13 @@ s390_register_info () cfun_frame_layout.high_fprs++; } - if (flag_pic) - clobbered_regs[PIC_OFFSET_TABLE_REGNUM] - |= !!df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM); + /* Register 12 is used for GOT address, but also as temp in prologue + for split-stack stdarg functions (unless r14 is available). */ + clobbered_regs[12] + |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) + || (flag_split_stack && cfun->stdarg + && (crtl->is_leaf || TARGET_TPF_PROFILING + || has_hard_reg_initial_val (Pmode, RETURN_REGNUM)))); clobbered_regs[BASE_REGNUM] |= (cfun->machine->base_reg @@ -9152,33 +9413,20 @@ s390_register_info () || (reload_completed && cfun_frame_layout.frame_size > 0) || cfun->calls_alloca); - memset (cfun_frame_layout.gpr_save_slots, 0, 16); + memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16); for (i = 6; i < 16; i++) if (clobbered_regs[i]) - cfun_gpr_save_slot (i) = -1; + cfun_gpr_save_slot (i) = SAVE_SLOT_STACK; s390_register_info_stdarg_fpr (); s390_register_info_gprtofpr (); - - /* First find the range of GPRs to be restored. Vararg regs don't - need to be restored so we do it before assigning slots to the - vararg GPRs. */ - for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++); - for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--); - cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i; - cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j; - + s390_register_info_set_ranges (); /* stdarg functions might need to save GPRs 2 to 6. This might - override the GPR->FPR save decision made above for r6 since - vararg regs must go to the stack. */ + override the GPR->FPR save decision made by + s390_register_info_gprtofpr for r6 since vararg regs must go to + the stack. */ s390_register_info_stdarg_gpr (); - - /* Now the range of GPRs which need saving. */ - for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++); - for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--); - cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i; - cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j; } /* This function is called by s390_optimize_prologue in order to get @@ -9189,7 +9437,7 @@ static void s390_optimize_register_info () { char clobbered_regs[32]; - int i, j; + int i; gcc_assert (epilogue_completed); gcc_assert (!cfun->machine->split_branches_pending_p); @@ -9212,23 +9460,14 @@ s390_optimize_register_info () || cfun_frame_layout.save_return_addr_p || crtl->calls_eh_return); - memset (cfun_frame_layout.gpr_save_slots, 0, 6); + memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6); for (i = 6; i < 16; i++) if (!clobbered_regs[i]) - cfun_gpr_save_slot (i) = 0; - - for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++); - for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--); - cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i; - cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j; + cfun_gpr_save_slot (i) = SAVE_SLOT_NONE; + s390_register_info_set_ranges (); s390_register_info_stdarg_gpr (); - - for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++); - for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--); - cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i; - cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j; } /* Fill cfun->machine with info about frame of current function. */ @@ -9391,10 +9630,17 @@ s390_init_frame_layout (void) as base register to avoid save/restore overhead. */ if (!base_used) cfun->machine->base_reg = NULL_RTX; - else if (crtl->is_leaf && !df_regs_ever_live_p (5)) - cfun->machine->base_reg = gen_rtx_REG (Pmode, 5); else - cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM); + { + int br = 0; + + if (crtl->is_leaf) + /* Prefer r5 (most likely to be free). */ + for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--) + ; + cfun->machine->base_reg = + gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM); + } s390_register_info (); s390_frame_info (); @@ -9642,7 +9888,7 @@ s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg) regrename manually about it. */ if (GENERAL_REGNO_P (new_reg) && !call_really_used_regs[new_reg] - && cfun_gpr_save_slot (new_reg) == 0) + && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE) return false; return true; @@ -9657,7 +9903,7 @@ s390_hard_regno_scratch_ok (unsigned int regno) /* See s390_hard_regno_rename_ok. */ if (GENERAL_REGNO_P (regno) && !call_really_used_regs[regno] - && cfun_gpr_save_slot (regno) == 0) + && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE) return false; return true; @@ -10238,12 +10484,15 @@ s390_emit_prologue (void) int next_fpr = 0; /* Choose best register to use for temp use within prologue. - See below for why TPF must use the register 1. */ + TPF with profiling must avoid the register 14 - the tracing function + needs the original contents of r14 to be preserved. */ if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM) && !crtl->is_leaf && !TARGET_TPF_PROFILING) temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM); + else if (flag_split_stack && cfun->stdarg) + temp_reg = gen_rtx_REG (Pmode, 12); else temp_reg = gen_rtx_REG (Pmode, 1); @@ -10673,7 +10922,7 @@ s390_emit_epilogue (bool sibcall) be in between two GPRs which need saving.) Otherwise it would be difficult to take that decision back in s390_optimize_prologue. */ - if (cfun_gpr_save_slot (RETURN_REGNUM) == -1) + if (cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK) { int return_regnum = find_unused_clobbered_reg(); if (!return_regnum) @@ -10737,6 +10986,166 @@ s300_set_up_by_prologue (hard_reg_set_container *regs) SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg)); } +/* -fsplit-stack support. */ + +/* A SYMBOL_REF for __morestack. */ +static GTY(()) rtx morestack_ref; + +/* When using -fsplit-stack, the allocation routines set a field in + the TCB to the bottom of the stack plus this much space, measured + in bytes. */ + +#define SPLIT_STACK_AVAILABLE 1024 + +/* Emit -fsplit-stack prologue, which goes before the regular function + prologue. */ + +void +s390_expand_split_stack_prologue (void) +{ + rtx r1, guard, cc = NULL; + rtx_insn *insn; + /* Offset from thread pointer to __private_ss. */ + int psso = TARGET_64BIT ? 0x38 : 0x20; + /* Pointer size in bytes. */ + /* Frame size and argument size - the two parameters to __morestack. */ + HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size; + /* Align argument size to 8 bytes - simplifies __morestack code. */ + HOST_WIDE_INT args_size = crtl->args.size >= 0 + ? ((crtl->args.size + 7) & ~7) + : 0; + /* Label to be called by __morestack. */ + rtx_code_label *call_done = NULL; + rtx_code_label *parm_base = NULL; + rtx tmp; + + gcc_assert (flag_split_stack && reload_completed); + if (!TARGET_CPU_ZARCH) + { + sorry ("CPUs older than z900 are not supported for -fsplit-stack"); + return; + } + + r1 = gen_rtx_REG (Pmode, 1); + + /* If no stack frame will be allocated, don't do anything. */ + if (!frame_size) + { + if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) + { + /* If va_start is used, just use r15. */ + emit_move_insn (r1, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, + GEN_INT (STACK_POINTER_OFFSET))); + + } + return; + } + + if (morestack_ref == NULL_RTX) + { + morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack"); + SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL + | SYMBOL_FLAG_FUNCTION); + } + + if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size)) + { + /* If frame_size will fit in an add instruction, do a stack space + check, and only call __morestack if there's not enough space. */ + + /* Get thread pointer. r1 is the only register we can always destroy - r0 + could contain a static chain (and cannot be used to address memory + anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */ + emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM)); + /* Aim at __private_ss. */ + guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso)); + + /* If less that 1kiB used, skip addition and compare directly with + __private_ss. */ + if (frame_size > SPLIT_STACK_AVAILABLE) + { + emit_move_insn (r1, guard); + if (TARGET_64BIT) + emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size))); + else + emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size))); + guard = r1; + } + + /* Compare the (maybe adjusted) guard with the stack pointer. */ + cc = s390_emit_compare (LT, stack_pointer_rtx, guard); + } + + call_done = gen_label_rtx (); + parm_base = gen_label_rtx (); + + /* Emit the parameter block. */ + tmp = gen_split_stack_data (parm_base, call_done, + GEN_INT (frame_size), + GEN_INT (args_size)); + insn = emit_insn (tmp); + add_reg_note (insn, REG_LABEL_OPERAND, call_done); + LABEL_NUSES (call_done)++; + add_reg_note (insn, REG_LABEL_OPERAND, parm_base); + LABEL_NUSES (parm_base)++; + + /* %r1 = litbase. */ + insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base)); + add_reg_note (insn, REG_LABEL_OPERAND, parm_base); + LABEL_NUSES (parm_base)++; + + /* Now, we need to call __morestack. It has very special calling + conventions: it preserves param/return/static chain registers for + calling main function body, and looks for its own parameters at %r1. */ + + if (cc != NULL) + { + tmp = gen_split_stack_cond_call (morestack_ref, cc, call_done); + + insn = emit_jump_insn (tmp); + JUMP_LABEL (insn) = call_done; + LABEL_NUSES (call_done)++; + + /* Mark the jump as very unlikely to be taken. */ + add_int_reg_note (insn, REG_BR_PROB, REG_BR_PROB_BASE / 100); + + if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) + { + /* If va_start is used, and __morestack was not called, just use + r15. */ + emit_move_insn (r1, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, + GEN_INT (STACK_POINTER_OFFSET))); + } + } + else + { + tmp = gen_split_stack_call (morestack_ref, call_done); + insn = emit_jump_insn (tmp); + JUMP_LABEL (insn) = call_done; + LABEL_NUSES (call_done)++; + emit_barrier (); + } + + /* __morestack will call us here. */ + + emit_label (call_done); +} + +/* We may have to tell the dataflow pass that the split stack prologue + is initializing a register. */ + +static void +s390_live_on_entry (bitmap regs) +{ + if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) + { + gcc_assert (flag_split_stack); + bitmap_set_bit (regs, 1); + } +} + /* Return true if the function can use simple_return to return outside of a shrink-wrapped region. At present shrink-wrapping is supported in all cases. */ @@ -10767,7 +11176,7 @@ s390_can_use_return_insn (void) return false; for (i = 0; i < 16; i++) - if (cfun_gpr_save_slot (i)) + if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE) return false; /* For 31 bit this is not covered by the frame_size check below @@ -11339,6 +11748,27 @@ s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); } + if (flag_split_stack + && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl)) + == NULL) + && cfun->machine->split_stack_varargs_pointer == NULL_RTX) + { + rtx reg; + rtx_insn *seq; + + reg = gen_reg_rtx (Pmode); + cfun->machine->split_stack_varargs_pointer = reg; + + start_sequence (); + emit_move_insn (reg, gen_rtx_REG (Pmode, 1)); + seq = get_insns (); + end_sequence (); + + push_topmost_sequence (); + emit_insn_after (seq, entry_of_function ()); + pop_topmost_sequence (); + } + /* Find the overflow area. FIXME: This currently is too pessimistic when the vector ABI is enabled. In that case we *always* set up the overflow area @@ -11347,7 +11777,10 @@ s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG || TARGET_VX_ABI) { - t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx); + if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) + t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx); + else + t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer); off = INTVAL (crtl->args.arg_offset_rtx); off = off < 0 ? 0 : off; @@ -11772,6 +12205,13 @@ s390_function_profiler (FILE *file, int labelno) output_asm_insn ("brasl\t%0,%4", op); output_asm_insn ("lg\t%0,%1", op); } + else if (TARGET_CPU_ZARCH) + { + output_asm_insn ("st\t%0,%1", op); + output_asm_insn ("larl\t%2,%3", op); + output_asm_insn ("brasl\t%0,%4", op); + output_asm_insn ("l\t%0,%1", op); + } else if (!flag_pic) { op[6] = gen_label_rtx (); @@ -11815,29 +12255,39 @@ s390_encode_section_info (tree decl, rtx rtl, int first) if (TREE_CODE (decl) == VAR_DECL) { - /* If a variable has a forced alignment to < 2 bytes, mark it - with SYMBOL_FLAG_ALIGN1 to prevent it from being used as LARL - operand. */ - if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16) - SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1; - if (!DECL_SIZE (decl) - || !DECL_ALIGN (decl) - || !tree_fits_shwi_p (DECL_SIZE (decl)) - || (DECL_ALIGN (decl) <= 64 - && DECL_ALIGN (decl) != tree_to_shwi (DECL_SIZE (decl)))) - SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED; + /* Store the alignment to be able to check if we can use + a larl/load-relative instruction. We only handle the cases + that can go wrong (i.e. no FUNC_DECLs). If a symref does + not have any flag we assume it to be correctly aligned. */ + + if (DECL_ALIGN (decl) % 64) + SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0)); + + if (DECL_ALIGN (decl) % 32) + SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0)); + + if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16) + SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0)); } /* Literal pool references don't have a decl so they are handled differently here. We rely on the information in the MEM_ALIGN - entry to decide upon natural alignment. */ + entry to decide upon the alignment. */ if (MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)) - && (MEM_ALIGN (rtl) == 0 - || GET_MODE_BITSIZE (GET_MODE (rtl)) == 0 - || MEM_ALIGN (rtl) < GET_MODE_BITSIZE (GET_MODE (rtl)))) - SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED; + && MEM_ALIGN (rtl) != 0 + && GET_MODE_BITSIZE (GET_MODE (rtl)) != 0) + { + if (MEM_ALIGN (rtl) % 64) + SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0)); + + if (MEM_ALIGN (rtl) % 32) + SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0)); + + if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16) + SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0)); + } } /* Output thunk to FILE that implements a C++ virtual function call (with @@ -12258,7 +12708,7 @@ s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg, replace the symbol itself with the PLT stub. */ if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location)) { - if (retaddr_reg != NULL_RTX) + if (TARGET_64BIT || retaddr_reg != NULL_RTX) { addr_location = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr_location), @@ -12465,9 +12915,9 @@ s390_optimize_prologue (void) /* It must not happen that what we once saved in an FPR now needs a stack slot. */ - gcc_assert (cfun_gpr_save_slot (gpr_regno) != -1); + gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK); - if (cfun_gpr_save_slot (gpr_regno) == 0) + if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE) { remove_insn (insn); continue; @@ -13358,7 +13808,7 @@ s390_sched_init (FILE *file ATTRIBUTE_UNUSED, The loop is analyzed for memory accesses by calling check_dpu for each rtx of the loop. Depending on the loop_depth and the amount of memory accesses a new number <=nunroll is returned to improve the - behaviour of the hardware prefetch unit. */ + behavior of the hardware prefetch unit. */ static unsigned s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop) { @@ -13396,230 +13846,285 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop) } } +/* Restore the current options. This is a hook function and also called + internally. */ + static void -s390_option_override (void) +s390_function_specific_restore (struct gcc_options *opts, + struct cl_target_option *ptr ATTRIBUTE_UNUSED) { - unsigned int i; - cl_deferred_option *opt; - vec *v = - (vec *) s390_deferred_options; + opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost; +} - if (v) - FOR_EACH_VEC_ELT (*v, i, opt) - { - switch (opt->opt_index) - { - case OPT_mhotpatch_: - { - int val1; - int val2; - char s[256]; - char *t; +static void +s390_option_override_internal (bool main_args_p, + struct gcc_options *opts, + const struct gcc_options *opts_set) +{ + const char *prefix; + const char *suffix; - strncpy (s, opt->arg, 256); - s[255] = 0; - t = strchr (s, ','); - if (t != NULL) - { - *t = 0; - t++; - val1 = integral_argument (s); - val2 = integral_argument (t); - } - else - { - val1 = -1; - val2 = -1; - } - if (val1 == -1 || val2 == -1) - { - /* argument is not a plain number */ - error ("arguments to %qs should be non-negative integers", - "-mhotpatch=n,m"); - break; - } - else if (val1 > s390_hotpatch_hw_max - || val2 > s390_hotpatch_hw_max) - { - error ("argument to %qs is too large (max. %d)", - "-mhotpatch=n,m", s390_hotpatch_hw_max); - break; - } - s390_hotpatch_hw_before_label = val1; - s390_hotpatch_hw_after_label = val2; - break; - } - default: - gcc_unreachable (); - } - } + /* Set up prefix/suffix so the error messages refer to either the command + line argument, or the attribute(target). */ + if (main_args_p) + { + prefix = "-m"; + suffix = ""; + } + else + { + prefix = "option(\""; + suffix = "\")"; + } - /* Set up function hooks. */ - init_machine_status = s390_init_machine_status; /* Architecture mode defaults according to ABI. */ - if (!(target_flags_explicit & MASK_ZARCH)) + if (!(opts_set->x_target_flags & MASK_ZARCH)) { if (TARGET_64BIT) - target_flags |= MASK_ZARCH; + opts->x_target_flags |= MASK_ZARCH; else - target_flags &= ~MASK_ZARCH; + opts->x_target_flags &= ~MASK_ZARCH; } - /* Set the march default in case it hasn't been specified on - cmdline. */ - if (s390_arch == PROCESSOR_max) - { - s390_arch_string = TARGET_ZARCH? "z900" : "g5"; - s390_arch = TARGET_ZARCH ? PROCESSOR_2064_Z900 : PROCESSOR_9672_G5; - s390_arch_flags = processor_flags_table[(int)s390_arch]; - } + /* Set the march default in case it hasn't been specified on cmdline. */ + if (!opts_set->x_s390_arch) + opts->x_s390_arch = PROCESSOR_2064_Z900; + else if (opts->x_s390_arch == PROCESSOR_9672_G5 + || opts->x_s390_arch == PROCESSOR_9672_G6) + warning (OPT_Wdeprecated, "%sarch=%s%s is deprecated and will be removed " + "in future releases; use at least %sarch=z900%s", + prefix, opts->x_s390_arch == PROCESSOR_9672_G5 ? "g5" : "g6", + suffix, prefix, suffix); + + opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch]; /* Determine processor to tune for. */ - if (s390_tune == PROCESSOR_max) - { - s390_tune = s390_arch; - s390_tune_flags = s390_arch_flags; - } + if (!opts_set->x_s390_tune) + opts->x_s390_tune = opts->x_s390_arch; + else if (opts->x_s390_tune == PROCESSOR_9672_G5 + || opts->x_s390_tune == PROCESSOR_9672_G6) + warning (OPT_Wdeprecated, "%stune=%s%s is deprecated and will be removed " + "in future releases; use at least %stune=z900%s", + prefix, opts->x_s390_tune == PROCESSOR_9672_G5 ? "g5" : "g6", + suffix, prefix, suffix); + + opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune]; /* Sanity checks. */ - if (s390_arch == PROCESSOR_NATIVE || s390_tune == PROCESSOR_NATIVE) + if (opts->x_s390_arch == PROCESSOR_NATIVE + || opts->x_s390_tune == PROCESSOR_NATIVE) gcc_unreachable (); - if (TARGET_ZARCH && !TARGET_CPU_ZARCH) - error ("z/Architecture mode not supported on %s", s390_arch_string); - if (TARGET_64BIT && !TARGET_ZARCH) + if (TARGET_ZARCH_P (opts->x_target_flags) && !TARGET_CPU_ZARCH_P (opts)) + error ("z/Architecture mode not supported on %s", + processor_table[(int)opts->x_s390_arch].name); + if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags)) error ("64-bit ABI not supported in ESA/390 mode"); - /* Use hardware DFP if available and not explicitly disabled by - user. E.g. with -m31 -march=z10 -mzarch */ - if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP) - target_flags |= MASK_HARD_DFP; - /* Enable hardware transactions if available and not explicitly disabled by user. E.g. with -m31 -march=zEC12 -mzarch */ - if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH) - target_flags |= MASK_OPT_HTM; + if (!TARGET_OPT_HTM_P (opts_set->x_target_flags)) + { + if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags)) + opts->x_target_flags |= MASK_OPT_HTM; + else + opts->x_target_flags &= ~MASK_OPT_HTM; + } - if (target_flags_explicit & MASK_OPT_VX) + if (TARGET_OPT_VX_P (opts_set->x_target_flags)) { - if (TARGET_OPT_VX) + if (TARGET_OPT_VX_P (opts->x_target_flags)) { - if (!TARGET_CPU_VX) + if (!TARGET_CPU_VX_P (opts)) error ("hardware vector support not available on %s", - s390_arch_string); - if (TARGET_SOFT_FLOAT) + processor_table[(int)opts->x_s390_arch].name); + if (TARGET_SOFT_FLOAT_P (opts->x_target_flags)) error ("hardware vector support not available with -msoft-float"); } } - else if (TARGET_CPU_VX) - /* Enable vector support if available and not explicitly disabled - by user. E.g. with -m31 -march=z13 -mzarch */ - target_flags |= MASK_OPT_VX; + else + { + if (TARGET_CPU_VX_P (opts)) + /* Enable vector support if available and not explicitly disabled + by user. E.g. with -m31 -march=z13 -mzarch */ + opts->x_target_flags |= MASK_OPT_VX; + else + opts->x_target_flags &= ~MASK_OPT_VX; + } - if (TARGET_HARD_DFP && !TARGET_DFP) + /* Use hardware DFP if available and not explicitly disabled by + user. E.g. with -m31 -march=z10 -mzarch */ + if (!TARGET_HARD_DFP_P (opts_set->x_target_flags)) + { + if (TARGET_DFP_P (opts)) + opts->x_target_flags |= MASK_HARD_DFP; + else + opts->x_target_flags &= ~MASK_HARD_DFP; + } + + if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts)) { - if (target_flags_explicit & MASK_HARD_DFP) + if (TARGET_HARD_DFP_P (opts_set->x_target_flags)) { - if (!TARGET_CPU_DFP) + if (!TARGET_CPU_DFP_P (opts)) error ("hardware decimal floating point instructions" - " not available on %s", s390_arch_string); - if (!TARGET_ZARCH) + " not available on %s", + processor_table[(int)opts->x_s390_arch].name); + if (!TARGET_ZARCH_P (opts->x_target_flags)) error ("hardware decimal floating point instructions" " not available in ESA/390 mode"); } else - target_flags &= ~MASK_HARD_DFP; + opts->x_target_flags &= ~MASK_HARD_DFP; } - if ((target_flags_explicit & MASK_SOFT_FLOAT) && TARGET_SOFT_FLOAT) + if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags) + && TARGET_SOFT_FLOAT_P (opts->x_target_flags)) { - if ((target_flags_explicit & MASK_HARD_DFP) && TARGET_HARD_DFP) + if (TARGET_HARD_DFP_P (opts_set->x_target_flags) + && TARGET_HARD_DFP_P (opts->x_target_flags)) error ("-mhard-dfp can%'t be used in conjunction with -msoft-float"); - target_flags &= ~MASK_HARD_DFP; - } - - /* Set processor cost function. */ - switch (s390_tune) - { - case PROCESSOR_2084_Z990: - s390_cost = &z990_cost; - break; - case PROCESSOR_2094_Z9_109: - case PROCESSOR_2094_Z9_EC: - s390_cost = &z9_109_cost; - break; - case PROCESSOR_2097_Z10: - s390_cost = &z10_cost; - break; - case PROCESSOR_2817_Z196: - s390_cost = &z196_cost; - break; - case PROCESSOR_2827_ZEC12: - case PROCESSOR_2964_Z13: - s390_cost = &zEC12_cost; - break; - default: - s390_cost = &z900_cost; + opts->x_target_flags &= ~MASK_HARD_DFP; } - if (TARGET_BACKCHAIN && TARGET_PACKED_STACK && TARGET_HARD_FLOAT) + if (TARGET_BACKCHAIN_P (opts->x_target_flags) + && TARGET_PACKED_STACK_P (opts->x_target_flags) + && TARGET_HARD_FLOAT_P (opts->x_target_flags)) error ("-mbackchain -mpacked-stack -mhard-float are not supported " "in combination"); - if (s390_stack_size) + if (opts->x_s390_stack_size) { - if (s390_stack_guard >= s390_stack_size) + if (opts->x_s390_stack_guard >= opts->x_s390_stack_size) error ("stack size must be greater than the stack guard value"); - else if (s390_stack_size > 1 << 16) + else if (opts->x_s390_stack_size > 1 << 16) error ("stack size must not be greater than 64k"); } - else if (s390_stack_guard) + else if (opts->x_s390_stack_guard) error ("-mstack-guard implies use of -mstack-size"); #ifdef TARGET_DEFAULT_LONG_DOUBLE_128 - if (!(target_flags_explicit & MASK_LONG_DOUBLE_128)) - target_flags |= MASK_LONG_DOUBLE_128; + if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags)) + opts->x_target_flags |= MASK_LONG_DOUBLE_128; #endif - if (s390_tune >= PROCESSOR_2097_Z10) + if (opts->x_s390_tune >= PROCESSOR_2097_Z10) { maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100, - global_options.x_param_values, - global_options_set.x_param_values); + opts->x_param_values, + opts_set->x_param_values); maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32, - global_options.x_param_values, - global_options_set.x_param_values); + opts->x_param_values, + opts_set->x_param_values); maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000, - global_options.x_param_values, - global_options_set.x_param_values); + opts->x_param_values, + opts_set->x_param_values); maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64, - global_options.x_param_values, - global_options_set.x_param_values); + opts->x_param_values, + opts_set->x_param_values); } maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256, - global_options.x_param_values, - global_options_set.x_param_values); + opts->x_param_values, + opts_set->x_param_values); /* values for loop prefetching */ maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256, - global_options.x_param_values, - global_options_set.x_param_values); + opts->x_param_values, + opts_set->x_param_values); maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128, - global_options.x_param_values, - global_options_set.x_param_values); + opts->x_param_values, + opts_set->x_param_values); /* s390 has more than 2 levels and the size is much larger. Since we are always running virtualized assume that we only get a small part of the caches above l1. */ maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500, - global_options.x_param_values, - global_options_set.x_param_values); + opts->x_param_values, + opts_set->x_param_values); maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2, - global_options.x_param_values, - global_options_set.x_param_values); + opts->x_param_values, + opts_set->x_param_values); maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6, - global_options.x_param_values, - global_options_set.x_param_values); + opts->x_param_values, + opts_set->x_param_values); + + /* Use the alternative scheduling-pressure algorithm by default. */ + maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2, + opts->x_param_values, + opts_set->x_param_values); + + /* Call target specific restore function to do post-init work. At the moment, + this just sets opts->x_s390_cost_pointer. */ + s390_function_specific_restore (opts, NULL); +} + +static void +s390_option_override (void) +{ + unsigned int i; + cl_deferred_option *opt; + vec *v = + (vec *) s390_deferred_options; + + if (v) + FOR_EACH_VEC_ELT (*v, i, opt) + { + switch (opt->opt_index) + { + case OPT_mhotpatch_: + { + int val1; + int val2; + char s[256]; + char *t; + + strncpy (s, opt->arg, 256); + s[255] = 0; + t = strchr (s, ','); + if (t != NULL) + { + *t = 0; + t++; + val1 = integral_argument (s); + val2 = integral_argument (t); + } + else + { + val1 = -1; + val2 = -1; + } + if (val1 == -1 || val2 == -1) + { + /* argument is not a plain number */ + error ("arguments to %qs should be non-negative integers", + "-mhotpatch=n,m"); + break; + } + else if (val1 > s390_hotpatch_hw_max + || val2 > s390_hotpatch_hw_max) + { + error ("argument to %qs is too large (max. %d)", + "-mhotpatch=n,m", s390_hotpatch_hw_max); + break; + } + s390_hotpatch_hw_before_label = val1; + s390_hotpatch_hw_after_label = val2; + break; + } + default: + gcc_unreachable (); + } + } + + /* Set up function hooks. */ + init_machine_status = s390_init_machine_status; + + s390_option_override_internal (true, &global_options, &global_options_set); + + /* Save the initial options in case the user does function specific + options. */ + target_option_default_node = build_target_option_node (&global_options); + target_option_current_node = target_option_default_node; /* This cannot reside in s390_option_optimization_table since HAVE_prefetch requires the arch flags to be evaluated already. Since prefetching @@ -13627,11 +14132,6 @@ s390_option_override (void) if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3) flag_prefetch_loop_arrays = 1; - /* Use the alternative scheduling-pressure algorithm by default. */ - maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2, - global_options.x_param_values, - global_options_set.x_param_values); - if (TARGET_TPF) { /* Don't emit DWARF3/4 unless specifically selected. The TPF @@ -13658,6 +14158,386 @@ s390_option_override (void) register_pass (&insert_pass_s390_early_mach); } +#if S390_USE_TARGET_ATTRIBUTE +/* Inner function to process the attribute((target(...))), take an argument and + set the current options from the argument. If we have a list, recursively go + over the list. */ + +static bool +s390_valid_target_attribute_inner_p (tree args, + struct gcc_options *opts, + struct gcc_options *new_opts_set, + bool force_pragma) +{ + char *next_optstr; + bool ret = true; + +#define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 } +#define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 } + static const struct + { + const char *string; + size_t len; + int opt; + int has_arg; + int only_as_pragma; + } attrs[] = { + /* enum options */ + S390_ATTRIB ("arch=", OPT_march_, 1), + S390_ATTRIB ("tune=", OPT_mtune_, 1), + /* uinteger options */ + S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1), + S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1), + S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1), + S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1), + /* flag options */ + S390_ATTRIB ("backchain", OPT_mbackchain, 0), + S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0), + S390_ATTRIB ("hard-float", OPT_mhard_float, 0), + S390_ATTRIB ("htm", OPT_mhtm, 0), + S390_ATTRIB ("vx", OPT_mvx, 0), + S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0), + S390_ATTRIB ("small-exec", OPT_msmall_exec, 0), + S390_ATTRIB ("soft-float", OPT_msoft_float, 0), + S390_ATTRIB ("mvcle", OPT_mmvcle, 0), + S390_PRAGMA ("zvector", OPT_mzvector, 0), + /* boolean options */ + S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0), + }; +#undef S390_ATTRIB +#undef S390_PRAGMA + + /* If this is a list, recurse to get the options. */ + if (TREE_CODE (args) == TREE_LIST) + { + bool ret = true; + int num_pragma_values; + int i; + + /* Note: attribs.c:decl_attributes prepends the values from + current_target_pragma to the list of target attributes. To determine + whether we're looking at a value of the attribute or the pragma we + assume that the first [list_length (current_target_pragma)] values in + the list are the values from the pragma. */ + num_pragma_values = (!force_pragma && current_target_pragma != NULL) + ? list_length (current_target_pragma) : 0; + for (i = 0; args; args = TREE_CHAIN (args), i++) + { + bool is_pragma; + + is_pragma = (force_pragma || i < num_pragma_values); + if (TREE_VALUE (args) + && !s390_valid_target_attribute_inner_p (TREE_VALUE (args), + opts, new_opts_set, + is_pragma)) + { + ret = false; + } + } + return ret; + } + + else if (TREE_CODE (args) != STRING_CST) + { + error ("attribute % argument not a string"); + return false; + } + + /* Handle multiple arguments separated by commas. */ + next_optstr = ASTRDUP (TREE_STRING_POINTER (args)); + + while (next_optstr && *next_optstr != '\0') + { + char *p = next_optstr; + char *orig_p = p; + char *comma = strchr (next_optstr, ','); + size_t len, opt_len; + int opt; + bool opt_set_p; + char ch; + unsigned i; + int mask = 0; + enum cl_var_type var_type; + bool found; + + if (comma) + { + *comma = '\0'; + len = comma - next_optstr; + next_optstr = comma + 1; + } + else + { + len = strlen (p); + next_optstr = NULL; + } + + /* Recognize no-xxx. */ + if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-') + { + opt_set_p = false; + p += 3; + len -= 3; + } + else + opt_set_p = true; + + /* Find the option. */ + ch = *p; + found = false; + for (i = 0; i < ARRAY_SIZE (attrs); i++) + { + opt_len = attrs[i].len; + if (ch == attrs[i].string[0] + && ((attrs[i].has_arg) ? len > opt_len : len == opt_len) + && memcmp (p, attrs[i].string, opt_len) == 0) + { + opt = attrs[i].opt; + if (!opt_set_p && cl_options[opt].cl_reject_negative) + continue; + mask = cl_options[opt].var_value; + var_type = cl_options[opt].var_type; + found = true; + break; + } + } + + /* Process the option. */ + if (!found) + { + error ("attribute(target(\"%s\")) is unknown", orig_p); + return false; + } + else if (attrs[i].only_as_pragma && !force_pragma) + { + /* Value is not allowed for the target attribute. */ + error ("Value %qs is not supported by attribute %", + attrs[i].string); + return false; + } + + else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR) + { + if (var_type == CLVC_BIT_CLEAR) + opt_set_p = !opt_set_p; + + if (opt_set_p) + opts->x_target_flags |= mask; + else + opts->x_target_flags &= ~mask; + new_opts_set->x_target_flags |= mask; + } + + else if (cl_options[opt].var_type == CLVC_BOOLEAN) + { + int value; + + if (cl_options[opt].cl_uinteger) + { + /* Unsigned integer argument. Code based on the function + decode_cmdline_option () in opts-common.c. */ + value = integral_argument (p + opt_len); + } + else + value = (opt_set_p) ? 1 : 0; + + if (value != -1) + { + struct cl_decoded_option decoded; + + /* Value range check; only implemented for numeric and boolean + options at the moment. */ + generate_option (opt, NULL, value, CL_TARGET, &decoded); + s390_handle_option (opts, new_opts_set, &decoded, input_location); + set_option (opts, new_opts_set, opt, value, + p + opt_len, DK_UNSPECIFIED, input_location, + global_dc); + } + else + { + error ("attribute(target(\"%s\")) is unknown", orig_p); + ret = false; + } + } + + else if (cl_options[opt].var_type == CLVC_ENUM) + { + bool arg_ok; + int value; + + arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET); + if (arg_ok) + set_option (opts, new_opts_set, opt, value, + p + opt_len, DK_UNSPECIFIED, input_location, + global_dc); + else + { + error ("attribute(target(\"%s\")) is unknown", orig_p); + ret = false; + } + } + + else + gcc_unreachable (); + } + return ret; +} + +/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */ + +tree +s390_valid_target_attribute_tree (tree args, + struct gcc_options *opts, + const struct gcc_options *opts_set, + bool force_pragma) +{ + tree t = NULL_TREE; + struct gcc_options new_opts_set; + + memset (&new_opts_set, 0, sizeof (new_opts_set)); + + /* Process each of the options on the chain. */ + if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set, + force_pragma)) + return error_mark_node; + + /* If some option was set (even if it has not changed), rerun + s390_option_override_internal, and then save the options away. */ + if (new_opts_set.x_target_flags + || new_opts_set.x_s390_arch + || new_opts_set.x_s390_tune + || new_opts_set.x_s390_stack_guard + || new_opts_set.x_s390_stack_size + || new_opts_set.x_s390_branch_cost + || new_opts_set.x_s390_warn_framesize + || new_opts_set.x_s390_warn_dynamicstack_p) + { + const unsigned char *src = (const unsigned char *)opts_set; + unsigned char *dest = (unsigned char *)&new_opts_set; + unsigned int i; + + /* Merge the original option flags into the new ones. */ + for (i = 0; i < sizeof(*opts_set); i++) + dest[i] |= src[i]; + + /* Do any overrides, such as arch=xxx, or tune=xxx support. */ + s390_option_override_internal (false, opts, &new_opts_set); + /* Save the current options unless we are validating options for + #pragma. */ + t = build_target_option_node (opts); + } + return t; +} + +/* Hook to validate attribute((target("string"))). */ + +static bool +s390_valid_target_attribute_p (tree fndecl, + tree ARG_UNUSED (name), + tree args, + int ARG_UNUSED (flags)) +{ + struct gcc_options func_options; + tree new_target, new_optimize; + bool ret = true; + + /* attribute((target("default"))) does nothing, beyond + affecting multi-versioning. */ + if (TREE_VALUE (args) + && TREE_CODE (TREE_VALUE (args)) == STRING_CST + && TREE_CHAIN (args) == NULL_TREE + && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0) + return true; + + tree old_optimize = build_optimization_node (&global_options); + + /* Get the optimization options of the current function. */ + tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); + + if (!func_optimize) + func_optimize = old_optimize; + + /* Init func_options. */ + memset (&func_options, 0, sizeof (func_options)); + init_options_struct (&func_options, NULL); + lang_hooks.init_options_struct (&func_options); + + cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize)); + + /* Initialize func_options to the default before its target options can + be set. */ + cl_target_option_restore (&func_options, + TREE_TARGET_OPTION (target_option_default_node)); + + new_target = s390_valid_target_attribute_tree (args, &func_options, + &global_options_set, + (args == + current_target_pragma)); + new_optimize = build_optimization_node (&func_options); + if (new_target == error_mark_node) + ret = false; + else if (fndecl && new_target) + { + DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target; + if (old_optimize != new_optimize) + DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize; + } + return ret; +} + +/* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl + cache. */ + +void +s390_activate_target_options (tree new_tree) +{ + cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree)); + if (TREE_TARGET_GLOBALS (new_tree)) + restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); + else if (new_tree == target_option_default_node) + restore_target_globals (&default_target_globals); + else + TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); + s390_previous_fndecl = NULL_TREE; +} + +/* Establish appropriate back-end context for processing the function + FNDECL. The argument might be NULL to indicate processing at top + level, outside of any function scope. */ +static void +s390_set_current_function (tree fndecl) +{ + /* Only change the context if the function changes. This hook is called + several times in the course of compiling a function, and we don't want to + slow things down too much or call target_reinit when it isn't safe. */ + if (fndecl == s390_previous_fndecl) + return; + + tree old_tree; + if (s390_previous_fndecl == NULL_TREE) + old_tree = target_option_current_node; + else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl)) + old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl); + else + old_tree = target_option_default_node; + + if (fndecl == NULL_TREE) + { + if (old_tree != target_option_current_node) + s390_activate_target_options (target_option_current_node); + return; + } + + tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); + if (new_tree == NULL_TREE) + new_tree = target_option_default_node; + + if (old_tree != new_tree) + s390_activate_target_options (new_tree); + s390_previous_fndecl = fndecl; +} +#endif + /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */ static bool @@ -13796,6 +14676,15 @@ s390_vector_alignment (const_tree type) return MIN (64, tree_to_shwi (TYPE_SIZE (type))); } +#ifdef HAVE_AS_MACHINE_MACHINEMODE +/* Implement TARGET_ASM_FILE_START. */ +static void +s390_asm_file_start (void) +{ + s390_asm_output_machine_for_arch (asm_out_file); +} +#endif + /* Implement TARGET_ASM_FILE_END. */ static void s390_asm_file_end (void) @@ -13818,6 +14707,9 @@ s390_asm_file_end (void) s390_vector_abi); #endif file_end_indicate_exec_stack (); + + if (flag_split_stack) + file_end_indicate_split_stack (); } /* Return true if TYPE is a vector bool type. */ @@ -14073,6 +14965,9 @@ s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree ty #undef TARGET_SET_UP_BY_PROLOGUE #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue +#undef TARGET_EXTRA_LIVE_ON_ENTRY +#define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry + #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \ s390_use_by_pieces_infrastructure_p @@ -14095,9 +14990,25 @@ s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree ty #undef TARGET_INVALID_BINARY_OP #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op +#ifdef HAVE_AS_MACHINE_MACHINEMODE +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START s390_asm_file_start +#endif + #undef TARGET_ASM_FILE_END #define TARGET_ASM_FILE_END s390_asm_file_end +#if S390_USE_TARGET_ATTRIBUTE +#undef TARGET_SET_CURRENT_FUNCTION +#define TARGET_SET_CURRENT_FUNCTION s390_set_current_function + +#undef TARGET_OPTION_VALID_ATTRIBUTE_P +#define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p +#endif + +#undef TARGET_OPTION_RESTORE +#define TARGET_OPTION_RESTORE s390_function_specific_restore + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-s390.h"