{
rtx save_area, mem;
rtx label;
- rtx label_ref;
rtx tmp_reg;
rtx nsse_reg;
alias_set_type set;
SSE saves. We need some preparation work to get this working. */
label = gen_label_rtx ();
- label_ref = gen_rtx_LABEL_REF (Pmode, label);
- /* Compute address to jump to :
- label - eax*4 + nnamed_sse_arguments*4 Or
- label - eax*5 + nnamed_sse_arguments*5 for AVX. */
- tmp_reg = gen_reg_rtx (Pmode);
nsse_reg = gen_reg_rtx (Pmode);
emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
- emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
- gen_rtx_MULT (Pmode, nsse_reg,
- GEN_INT (4))));
-
- /* vmovaps is one byte longer than movaps. */
- if (TARGET_AVX)
- emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
- gen_rtx_PLUS (Pmode, tmp_reg,
- nsse_reg)));
-
- if (cum->sse_regno)
- emit_move_insn
- (nsse_reg,
- gen_rtx_CONST (DImode,
- gen_rtx_PLUS (DImode,
- label_ref,
- GEN_INT (cum->sse_regno
- * (TARGET_AVX ? 5 : 4)))));
- else
- emit_move_insn (nsse_reg, label_ref);
- emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
/* Compute address of memory block we save into. We always use pointer
pointing 127 bytes after first byte to store - this is needed to keep
mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
MEM_NOTRAP_P (mem) = 1;
set_mem_alias_set (mem, set);
- set_mem_align (mem, BITS_PER_WORD);
+ set_mem_align (mem, 64);
/* And finally do the dirty job! */
emit_insn (gen_sse_prologue_save (mem, nsse_reg,
- GEN_INT (cum->sse_regno), label));
+ GEN_INT (cum->sse_regno), label,
+ gen_reg_rtx (Pmode)));
}
}
int indirect_p = 0;
tree ptrtype;
enum machine_mode nat_mode;
- int arg_boundary;
+ unsigned int arg_boundary;
/* Only 64bit target needs something special. */
if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
size_int (-align));
t = fold_convert (TREE_TYPE (ovf), t);
+ if (crtl->stack_alignment_needed < arg_boundary)
+ crtl->stack_alignment_needed = arg_boundary;
}
gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
gimplify_assign (addr, t, pre_p);
}
/* x86-64 ABI requires arrays greater than 16 bytes to be aligned
- to 16byte boundary. */
- if (TARGET_64BIT)
+ to 16byte boundary. Exact wording is:
+
+ An array uses the same alignment as its elements, except that a local or
+ global array variable of length at least 16 bytes or
+ a C99 variable-length array variable always has alignment of at least 16 bytes.
+
+ This was added to allow use of aligned SSE instructions at arrays. This
+ rule is meant for static storage (where compiler can not do the analysis
+ by itself). We follow it for automatic variables only when convenient.
+ We fully control everything in the function compiled and functions from
+ other unit can not rely on the alignment.
+
+ Exclude va_list type. It is the common case of local array where
+ we can not benefit from the alignment. */
+ if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
+ && TARGET_SSE)
{
if (AGGREGATE_TYPE_P (type)
+ && (TYPE_MAIN_VARIANT (type)
+ != TYPE_MAIN_VARIANT (va_list_type_node))
&& TYPE_SIZE (type)
&& TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
&& (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
(UNSPEC_SET_RIP 16)
(UNSPEC_SET_GOT_OFFSET 17)
(UNSPEC_MEMORY_BLOCKAGE 18)
+ (UNSPEC_SSE_PROLOGUE_SAVE_LOW 19)
; TLS support
(UNSPEC_TP 20)
(reg:DI XMM5_REG)
(reg:DI XMM6_REG)
(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
- (use (match_operand:DI 1 "register_operand" ""))
+ (clobber (match_operand:DI 1 "register_operand" ""))
(use (match_operand:DI 2 "immediate_operand" ""))
- (use (label_ref:DI (match_operand 3 "" "")))])]
+ (use (label_ref:DI (match_operand 3 "" "")))
+ (clobber (match_operand:DI 4 "register_operand" ""))
+ (use (match_dup 1))])]
"TARGET_64BIT"
"")
-(define_insn "*sse_prologue_save_insn"
+;; Pre-reload version of prologue save. Until after prologue generation we don't know
+;; what the size of save instruction will be.
+;; Operand 0+operand 6 is the memory save area
+;; Operand 1 is number of registers to save (will get overwritten to operand 5)
+;; Operand 2 is number of non-vaargs SSE arguments
+;; Operand 3 is label starting the save block
+;; Operand 4 is used for temporary computation of jump address
+(define_insn "*sse_prologue_save_insn1"
[(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
- (match_operand:DI 4 "const_int_operand" "n")))
+ (match_operand:DI 6 "const_int_operand" "n")))
(unspec:BLK [(reg:DI XMM0_REG)
(reg:DI XMM1_REG)
(reg:DI XMM2_REG)
(reg:DI XMM5_REG)
(reg:DI XMM6_REG)
(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
+ (clobber (match_operand:DI 1 "register_operand" "=r"))
+ (use (match_operand:DI 2 "const_int_operand" "i"))
+ (use (label_ref:DI (match_operand 3 "" "X")))
+ (clobber (match_operand:DI 4 "register_operand" "=&r"))
+ (use (match_operand:DI 5 "register_operand" "1"))]
+ "TARGET_64BIT
+ && INTVAL (operands[6]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
+ && INTVAL (operands[6]) + INTVAL (operands[2]) * 16 >= -128"
+ "#"
+ [(set_attr "type" "other")
+ (set_attr "memory" "store")
+ (set_attr "mode" "DI")])
+
+;; We know size of save instruction; expand the computation of jump address
+;; in the jumptable.
+(define_split
+ [(parallel [(set (match_operand:BLK 0 "" "")
+ (unspec:BLK [(reg:DI XMM0_REG)
+ (reg:DI XMM1_REG)
+ (reg:DI XMM2_REG)
+ (reg:DI XMM3_REG)
+ (reg:DI XMM4_REG)
+ (reg:DI XMM5_REG)
+ (reg:DI XMM6_REG)
+ (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
+ (clobber (match_operand:DI 1 "register_operand" ""))
+ (use (match_operand:DI 2 "const_int_operand" ""))
+ (use (match_operand 3 "" ""))
+ (clobber (match_operand:DI 4 "register_operand" ""))
+ (use (match_operand:DI 5 "register_operand" ""))])]
+ "reload_completed"
+ [(parallel [(set (match_dup 0)
+ (unspec:BLK [(reg:DI XMM0_REG)
+ (reg:DI XMM1_REG)
+ (reg:DI XMM2_REG)
+ (reg:DI XMM3_REG)
+ (reg:DI XMM4_REG)
+ (reg:DI XMM5_REG)
+ (reg:DI XMM6_REG)
+ (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE_LOW))
+ (use (match_dup 1))
+ (use (match_dup 2))
+ (use (match_dup 3))
+ (use (match_dup 5))])]
+{
+ /* Movaps is 4 bytes, AVX and movsd is 5 bytes. */
+ int size = 4 + (TARGET_AVX || crtl->stack_alignment_needed < 128);
+
+ /* Compute address to jump to:
+ label - eax*size + nnamed_sse_arguments*size. */
+ if (size == 5)
+ emit_insn (gen_rtx_SET (VOIDmode, operands[4],
+ gen_rtx_PLUS
+ (Pmode,
+ gen_rtx_MULT (Pmode, operands[1],
+ GEN_INT (4)),
+ operands[1])));
+ else if (size == 4)
+ emit_insn (gen_rtx_SET (VOIDmode, operands[4],
+ gen_rtx_MULT (Pmode, operands[1],
+ GEN_INT (4))));
+ else
+ gcc_unreachable ();
+ if (INTVAL (operands[2]))
+ emit_move_insn
+ (operands[1],
+ gen_rtx_CONST (DImode,
+ gen_rtx_PLUS (DImode,
+ operands[3],
+ GEN_INT (INTVAL (operands[2])
+ * size))));
+ else
+ emit_move_insn (operands[1], operands[3]);
+ emit_insn (gen_subdi3 (operands[1], operands[1], operands[4]));
+ operands[5] = GEN_INT (size);
+})
+
+(define_insn "sse_prologue_save_insn"
+ [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
+ (match_operand:DI 4 "const_int_operand" "n")))
+ (unspec:BLK [(reg:DI XMM0_REG)
+ (reg:DI XMM1_REG)
+ (reg:DI XMM2_REG)
+ (reg:DI XMM3_REG)
+ (reg:DI XMM4_REG)
+ (reg:DI XMM5_REG)
+ (reg:DI XMM6_REG)
+ (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE_LOW))
(use (match_operand:DI 1 "register_operand" "r"))
(use (match_operand:DI 2 "const_int_operand" "i"))
- (use (label_ref:DI (match_operand 3 "" "X")))]
+ (use (label_ref:DI (match_operand 3 "" "X")))
+ (use (match_operand:DI 5 "const_int_operand" "i"))]
"TARGET_64BIT
&& INTVAL (operands[4]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
&& INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128"
PUT_MODE (operands[4], TImode);
if (GET_CODE (XEXP (operands[0], 0)) != PLUS)
output_asm_insn ("rex", operands);
- output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands);
+ if (crtl->stack_alignment_needed < 128)
+ output_asm_insn ("%vmovsd\t{%5, %4|%4, %5}", operands);
+ else
+ output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands);
}
(*targetm.asm_out.internal_label) (asm_out_file, "L",
CODE_LABEL_NUMBER (operands[3]));
[(set_attr "type" "other")
(set_attr "length_immediate" "0")
(set_attr "length_address" "0")
+ ;; 2 bytes for jump and opernds[4] bytes for each save.
(set (attr "length")
- (if_then_else
- (eq (symbol_ref "TARGET_AVX") (const_int 0))
- (const_string "34")
- (const_string "42")))
+ (plus (const_int 2)
+ (mult (symbol_ref ("INTVAL (operands[5])"))
+ (symbol_ref ("X86_64_SSE_REGPARM_MAX - INTVAL (operands[2])")))))
(set_attr "memory" "store")
(set_attr "modrm" "0")
(set_attr "prefix" "maybe_vex")