enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
- enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
- /* INSNs for fusing addi with loads
- or stores for each reg. class. */
- enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
- enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
- /* INSNs for fusing addis with loads
- or stores for each reg. class. */
- enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
- enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
- bool fused_toc; /* Mode supports TOC fusion. */
};
static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
#undef TARGET_EH_RETURN_FILTER_MODE
#define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
+#undef TARGET_TRANSLATE_MODE_ATTRIBUTE
+#define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
+
#undef TARGET_SCALAR_MODE_SUPPORTED_P
#define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
#undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
#define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
-#undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
-#define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
-
#undef TARGET_MD_ASM_ADJUST
#define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
{
ssize_t rc;
int spaces = 0;
- bool fuse_extra_p;
fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
for (rc = 0; rc < N_RELOAD_REG; rc++)
if ((reg_addr[m].reload_store != CODE_FOR_nothing)
|| (reg_addr[m].reload_load != CODE_FOR_nothing))
- fprintf (stderr, " Reload=%c%c",
- (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
- (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
+ {
+ fprintf (stderr, "%*s Reload=%c%c", spaces, "",
+ (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
+ (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
+ spaces = 0;
+ }
else
spaces += sizeof (" Reload=sl") - 1;
else
spaces += sizeof (" Upper=y") - 1;
- fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
- || reg_addr[m].fused_toc);
- if (!fuse_extra_p)
- {
- for (rc = 0; rc < N_RELOAD_REG; rc++)
- {
- if (rc != RELOAD_REG_ANY)
- {
- if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
- || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
- || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
- || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
- || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
- {
- fuse_extra_p = true;
- break;
- }
- }
- }
- }
-
- if (fuse_extra_p)
- {
- fprintf (stderr, "%*s Fuse:", spaces, "");
- spaces = 0;
-
- for (rc = 0; rc < N_RELOAD_REG; rc++)
- {
- if (rc != RELOAD_REG_ANY)
- {
- char load, store;
-
- if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
- load = 'l';
- else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
- load = 'L';
- else
- load = '-';
-
- if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
- store = 's';
- else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
- store = 'S';
- else
- store = '-';
-
- if (load == '-' && store == '-')
- spaces += 5;
- else
- {
- fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
- reload_reg_map[rc].name[0], load, store);
- spaces = 0;
- }
- }
- }
-
- if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
- {
- fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
- spaces = 0;
- }
- else
- spaces += sizeof (" P8gpr") - 1;
-
- if (reg_addr[m].fused_toc)
- {
- fprintf (stderr, "%*sToc", (spaces + 1), "");
- spaces = 0;
- }
- else
- spaces += sizeof (" Toc") - 1;
- }
- else
- spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
-
if (rs6000_vector_unit[m] != VECTOR_NONE
|| rs6000_vector_mem[m] != VECTOR_NONE)
{
char options[80];
strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
- if (TARGET_TOC_FUSION)
- strcat (options, ", toc");
-
if (TARGET_P8_FUSION_SIGN)
strcat (options, ", sign");
}
}
- /* Setup the fusion operations. */
- if (TARGET_P8_FUSION)
- {
- reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
- reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
- reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
- if (TARGET_64BIT)
- reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
- }
-
- if (TARGET_P9_FUSION)
- {
- struct fuse_insns {
- enum machine_mode mode; /* mode of the fused type. */
- enum machine_mode pmode; /* pointer mode. */
- enum rs6000_reload_reg_type rtype; /* register type. */
- enum insn_code load; /* load insn. */
- enum insn_code store; /* store insn. */
- };
-
- static const struct fuse_insns addis_insns[] = {
- { E_SFmode, E_DImode, RELOAD_REG_FPR,
- CODE_FOR_fusion_vsx_di_sf_load,
- CODE_FOR_fusion_vsx_di_sf_store },
-
- { E_SFmode, E_SImode, RELOAD_REG_FPR,
- CODE_FOR_fusion_vsx_si_sf_load,
- CODE_FOR_fusion_vsx_si_sf_store },
-
- { E_DFmode, E_DImode, RELOAD_REG_FPR,
- CODE_FOR_fusion_vsx_di_df_load,
- CODE_FOR_fusion_vsx_di_df_store },
-
- { E_DFmode, E_SImode, RELOAD_REG_FPR,
- CODE_FOR_fusion_vsx_si_df_load,
- CODE_FOR_fusion_vsx_si_df_store },
-
- { E_DImode, E_DImode, RELOAD_REG_FPR,
- CODE_FOR_fusion_vsx_di_di_load,
- CODE_FOR_fusion_vsx_di_di_store },
-
- { E_DImode, E_SImode, RELOAD_REG_FPR,
- CODE_FOR_fusion_vsx_si_di_load,
- CODE_FOR_fusion_vsx_si_di_store },
-
- { E_QImode, E_DImode, RELOAD_REG_GPR,
- CODE_FOR_fusion_gpr_di_qi_load,
- CODE_FOR_fusion_gpr_di_qi_store },
-
- { E_QImode, E_SImode, RELOAD_REG_GPR,
- CODE_FOR_fusion_gpr_si_qi_load,
- CODE_FOR_fusion_gpr_si_qi_store },
-
- { E_HImode, E_DImode, RELOAD_REG_GPR,
- CODE_FOR_fusion_gpr_di_hi_load,
- CODE_FOR_fusion_gpr_di_hi_store },
-
- { E_HImode, E_SImode, RELOAD_REG_GPR,
- CODE_FOR_fusion_gpr_si_hi_load,
- CODE_FOR_fusion_gpr_si_hi_store },
-
- { E_SImode, E_DImode, RELOAD_REG_GPR,
- CODE_FOR_fusion_gpr_di_si_load,
- CODE_FOR_fusion_gpr_di_si_store },
-
- { E_SImode, E_SImode, RELOAD_REG_GPR,
- CODE_FOR_fusion_gpr_si_si_load,
- CODE_FOR_fusion_gpr_si_si_store },
-
- { E_SFmode, E_DImode, RELOAD_REG_GPR,
- CODE_FOR_fusion_gpr_di_sf_load,
- CODE_FOR_fusion_gpr_di_sf_store },
-
- { E_SFmode, E_SImode, RELOAD_REG_GPR,
- CODE_FOR_fusion_gpr_si_sf_load,
- CODE_FOR_fusion_gpr_si_sf_store },
-
- { E_DImode, E_DImode, RELOAD_REG_GPR,
- CODE_FOR_fusion_gpr_di_di_load,
- CODE_FOR_fusion_gpr_di_di_store },
-
- { E_DFmode, E_DImode, RELOAD_REG_GPR,
- CODE_FOR_fusion_gpr_di_df_load,
- CODE_FOR_fusion_gpr_di_df_store },
- };
-
- machine_mode cur_pmode = Pmode;
- size_t i;
-
- for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
- {
- machine_mode xmode = addis_insns[i].mode;
- enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
-
- if (addis_insns[i].pmode != cur_pmode)
- continue;
-
- if (rtype == RELOAD_REG_FPR && !TARGET_HARD_FLOAT)
- continue;
-
- reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
- reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
-
- if (rtype == RELOAD_REG_FPR && TARGET_P9_VECTOR)
- {
- reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX]
- = addis_insns[i].load;
- reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX]
- = addis_insns[i].store;
- }
- }
- }
-
- /* Note which types we support fusing TOC setup plus memory insn. We only do
- fused TOCs for medium/large code models. */
- if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
- && (TARGET_CMODEL != CMODEL_SMALL))
- {
- reg_addr[QImode].fused_toc = true;
- reg_addr[HImode].fused_toc = true;
- reg_addr[SImode].fused_toc = true;
- reg_addr[DImode].fused_toc = true;
- if (TARGET_HARD_FLOAT)
- {
- reg_addr[SFmode].fused_toc = true;
- reg_addr[DFmode].fused_toc = true;
- }
- }
-
/* Precalculate HARD_REGNO_NREGS. */
for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
for (m = 0; m < NUM_MACHINE_MODES; ++m)
& OPTION_MASK_P8_FUSION);
/* Setting additional fusion flags turns on base fusion. */
- if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
+ if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
{
if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
{
error ("%qs requires %qs", "-mpower8-fusion-sign",
"-mpower8-fusion");
- if (TARGET_TOC_FUSION)
- error ("%qs requires %qs", "-mtoc-fusion", "-mpower8-fusion");
-
rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
}
else
&& optimize >= 3)
rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
- /* TOC fusion requires 64-bit and medium/large code model. */
- if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
- {
- rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
- if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
- warning (0, N_("-mtoc-fusion requires 64-bit"));
- }
-
- if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
- {
- rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
- if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
- warning (0, N_("-mtoc-fusion requires medium/large code model"));
- }
-
- /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
- model. */
- if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
- && (TARGET_CMODEL != CMODEL_SMALL)
- && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
- rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
-
/* ISA 3.0 vector instructions include ISA 2.07. */
if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
{
if (rs6000_tune == PROCESSOR_TITAN
|| rs6000_tune == PROCESSOR_CELL)
{
- if (align_functions <= 0)
- align_functions = 8;
- if (align_jumps <= 0)
- align_jumps = 8;
- if (align_loops <= 0)
- align_loops = 8;
+ if (flag_align_functions && !str_align_functions)
+ str_align_functions = "8";
+ if (flag_align_jumps && !str_align_jumps)
+ str_align_jumps = "8";
+ if (flag_align_loops && !str_align_loops)
+ str_align_loops = "8";
}
if (rs6000_align_branch_targets)
{
- if (align_functions <= 0)
- align_functions = 16;
- if (align_jumps <= 0)
- align_jumps = 16;
- if (align_loops <= 0)
+ if (flag_align_functions && !str_align_functions)
+ str_align_functions = "16";
+ if (flag_align_jumps && !str_align_jumps)
+ str_align_jumps = "16";
+ if (flag_align_loops && !str_align_loops)
{
can_override_loop_align = 1;
- align_loops = 16;
+ str_align_loops = "16";
}
}
- if (align_jumps_max_skip <= 0)
- align_jumps_max_skip = 15;
- if (align_loops_max_skip <= 0)
- align_loops_max_skip = 15;
+
+ if (flag_align_jumps && !str_align_jumps)
+ str_align_jumps = "16";
+ if (flag_align_loops && !str_align_loops)
+ str_align_loops = "16";
}
/* Arrange to save and restore machine status around nested functions. */
}
/* Implement LOOP_ALIGN. */
-int
+align_flags
rs6000_loop_align (rtx label)
{
basic_block bb;
/* Don't override loop alignment if -falign-loops was specified. */
if (!can_override_loop_align)
- return align_loops_log;
+ return align_loops;
bb = BLOCK_FOR_INSN (label);
ninsns = num_loop_insns(bb->loop_father);
|| rs6000_tune == PROCESSOR_POWER6
|| rs6000_tune == PROCESSOR_POWER7
|| rs6000_tune == PROCESSOR_POWER8))
- return 5;
+ return align_flags (5);
else
- return align_loops_log;
-}
-
-/* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
-static int
-rs6000_loop_align_max_skip (rtx_insn *label)
-{
- return (1 << rs6000_loop_align (label)) - 1;
+ return align_loops;
}
/* Return true iff, data reference of TYPE can reach vector alignment (16)
struct loop *loop = data->loop_info;
basic_block *bbs = get_loop_body (loop);
int nbbs = loop->num_nodes;
+ loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info);
int vec_cost = data->cost[vect_body], not_vec_cost = 0;
int i, density_pct;
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple *stmt = gsi_stmt (gsi);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
if (!STMT_VINFO_RELEVANT_P (stmt_info)
&& !STMT_VINFO_IN_PATTERN_P (stmt_info))
size_t i;
for (i = 0; i < 4; i++)
- {
- elements[i] = XVECEXP (vals, 0, i);
- if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
- elements[i] = copy_to_mode_reg (SImode, elements[i]);
- }
+ elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
elements[2], elements[3]));
gcc_unreachable ();
}
-/* Helper function for rs6000_split_v4si_init to build up a DImode value from
- two SImode values. */
-
-static void
-rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
-{
- const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
-
- if (CONST_INT_P (si1) && CONST_INT_P (si2))
- {
- unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
- unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
-
- emit_move_insn (dest, GEN_INT (const1 | const2));
- return;
- }
-
- /* Put si1 into upper 32-bits of dest. */
- if (CONST_INT_P (si1))
- emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
- else
- {
- /* Generate RLDIC. */
- rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
- rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
- rtx mask_rtx = GEN_INT (mask_32bit << 32);
- rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
- gcc_assert (!reg_overlap_mentioned_p (dest, si1));
- emit_insn (gen_rtx_SET (dest, and_rtx));
- }
-
- /* Put si2 into the temporary. */
- gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
- if (CONST_INT_P (si2))
- emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
- else
- emit_insn (gen_zero_extendsidi2 (tmp, si2));
-
- /* Combine the two parts. */
- emit_insn (gen_iordi3 (dest, dest, tmp));
- return;
-}
-
-/* Split a V4SI initialization. */
-
-void
-rs6000_split_v4si_init (rtx operands[])
-{
- rtx dest = operands[0];
-
- /* Destination is a GPR, build up the two DImode parts in place. */
- if (REG_P (dest) || SUBREG_P (dest))
- {
- int d_regno = regno_or_subregno (dest);
- rtx scalar1 = operands[1];
- rtx scalar2 = operands[2];
- rtx scalar3 = operands[3];
- rtx scalar4 = operands[4];
- rtx tmp1 = operands[5];
- rtx tmp2 = operands[6];
-
- /* Even though we only need one temporary (plus the destination, which
- has an early clobber constraint, try to use two temporaries, one for
- each double word created. That way the 2nd insn scheduling pass can
- rearrange things so the two parts are done in parallel. */
- if (BYTES_BIG_ENDIAN)
- {
- rtx di_lo = gen_rtx_REG (DImode, d_regno);
- rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
- rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
- rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
- }
- else
- {
- rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
- rtx di_hi = gen_rtx_REG (DImode, d_regno);
- rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
- rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
- }
- return;
- }
-
- else
- gcc_unreachable ();
-}
-
/* Return alignment of TYPE. Existing alignment is ALIGN. HOW
selects whether the alignment is abi mandated, optional, or
both abi and optional alignment. */
if (legitimate_constant_pool_address_p (x, mode,
reg_ok_strict || lra_in_progress))
return 1;
- if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
- && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
- return 1;
}
/* For TImode, if we have TImode in VSX registers, only allow register
case ALTIVEC_BUILTIN_STVX_V4SF:
case ALTIVEC_BUILTIN_STVX_V2DI:
case ALTIVEC_BUILTIN_STVX_V2DF:
+ case VSX_BUILTIN_STXVW4X_V16QI:
+ case VSX_BUILTIN_STXVW4X_V8HI:
+ case VSX_BUILTIN_STXVW4X_V4SF:
+ case VSX_BUILTIN_STXVW4X_V4SI:
+ case VSX_BUILTIN_STXVD2X_V2DF:
+ case VSX_BUILTIN_STXVD2X_V2DI:
return true;
default:
return false;
return true;
}
+ /* unaligned Vector loads. */
+ case VSX_BUILTIN_LXVW4X_V16QI:
+ case VSX_BUILTIN_LXVW4X_V8HI:
+ case VSX_BUILTIN_LXVW4X_V4SF:
+ case VSX_BUILTIN_LXVW4X_V4SI:
+ case VSX_BUILTIN_LXVD2X_V2DF:
+ case VSX_BUILTIN_LXVD2X_V2DI:
+ {
+ arg0 = gimple_call_arg (stmt, 0); // offset
+ arg1 = gimple_call_arg (stmt, 1); // address
+ lhs = gimple_call_lhs (stmt);
+ location_t loc = gimple_location (stmt);
+ /* Since arg1 may be cast to a different type, just use ptr_type_node
+ here instead of trying to enforce TBAA on pointer types. */
+ tree arg1_type = ptr_type_node;
+ tree lhs_type = TREE_TYPE (lhs);
+ /* In GIMPLE the type of the MEM_REF specifies the alignment. The
+ required alignment (power) is 4 bytes regardless of data type. */
+ tree align_ltype = build_aligned_type (lhs_type, 4);
+ /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
+ the tree using the value from arg0. The resulting type will match
+ the type of arg1. */
+ gimple_seq stmts = NULL;
+ tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
+ tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
+ arg1_type, arg1, temp_offset);
+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+ /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
+ take an offset, but since we've already incorporated the offset
+ above, here we just pass in a zero. */
+ gimple *g;
+ g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr,
+ build_int_cst (arg1_type, 0)));
+ gimple_set_location (g, loc);
+ gsi_replace (gsi, g, true);
+ return true;
+ }
+
+ /* unaligned Vector stores. */
+ case VSX_BUILTIN_STXVW4X_V16QI:
+ case VSX_BUILTIN_STXVW4X_V8HI:
+ case VSX_BUILTIN_STXVW4X_V4SF:
+ case VSX_BUILTIN_STXVW4X_V4SI:
+ case VSX_BUILTIN_STXVD2X_V2DF:
+ case VSX_BUILTIN_STXVD2X_V2DI:
+ {
+ arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
+ arg1 = gimple_call_arg (stmt, 1); /* Offset. */
+ tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
+ location_t loc = gimple_location (stmt);
+ tree arg0_type = TREE_TYPE (arg0);
+ /* Use ptr_type_node (no TBAA) for the arg2_type. */
+ tree arg2_type = ptr_type_node;
+ /* In GIMPLE the type of the MEM_REF specifies the alignment. The
+ required alignment (power) is 4 bytes regardless of data type. */
+ tree align_stype = build_aligned_type (arg0_type, 4);
+ /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
+ the tree using the value from arg1. */
+ gimple_seq stmts = NULL;
+ tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
+ tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
+ arg2_type, arg2, temp_offset);
+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+ gimple *g;
+ g = gimple_build_assign (build2 (MEM_REF, align_stype, temp_addr,
+ build_int_cst (arg2_type, 0)), arg0);
+ gimple_set_location (g, loc);
+ gsi_replace (gsi, g, true);
+ return true;
+ }
+
/* Vector Fused multiply-add (fma). */
case ALTIVEC_BUILTIN_VMADDFP:
case VSX_BUILTIN_XVMADDDP:
case VSX_BUILTIN_VEC_MERGEH_V2DI:
fold_mergehl_helper (gsi, stmt, 0);
return true;
+
+ /* d = vec_pack (a, b) */
+ case P8V_BUILTIN_VPKUDUM:
+ case ALTIVEC_BUILTIN_VPKUHUM:
+ case ALTIVEC_BUILTIN_VPKUWUM:
+ {
+ arg0 = gimple_call_arg (stmt, 0);
+ arg1 = gimple_call_arg (stmt, 1);
+ lhs = gimple_call_lhs (stmt);
+ gimple *g = gimple_build_assign (lhs, VEC_PACK_TRUNC_EXPR, arg0, arg1);
+ gimple_set_location (g, gimple_location (stmt));
+ gsi_replace (gsi, g, true);
+ return true;
+ }
+
+ /* d = vec_unpackh (a) */
+ /* Note that the UNPACK_{HI,LO}_EXPR used in the gimple_build_assign call
+ in this code is sensitive to endian-ness, and needs to be inverted to
+ handle both LE and BE targets. */
+ case ALTIVEC_BUILTIN_VUPKHSB:
+ case ALTIVEC_BUILTIN_VUPKHSH:
+ case P8V_BUILTIN_VUPKHSW:
+ {
+ arg0 = gimple_call_arg (stmt, 0);
+ lhs = gimple_call_lhs (stmt);
+ if (BYTES_BIG_ENDIAN)
+ g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0);
+ else
+ g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0);
+ gimple_set_location (g, gimple_location (stmt));
+ gsi_replace (gsi, g, true);
+ return true;
+ }
+ /* d = vec_unpackl (a) */
+ case ALTIVEC_BUILTIN_VUPKLSB:
+ case ALTIVEC_BUILTIN_VUPKLSH:
+ case P8V_BUILTIN_VUPKLSW:
+ {
+ arg0 = gimple_call_arg (stmt, 0);
+ lhs = gimple_call_lhs (stmt);
+ if (BYTES_BIG_ENDIAN)
+ g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0);
+ else
+ g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0);
+ gimple_set_location (g, gimple_location (stmt));
+ gsi_replace (gsi, g, true);
+ return true;
+ }
+ /* There is no gimple type corresponding with pixel, so just return. */
+ case ALTIVEC_BUILTIN_VUPKHPX:
+ case ALTIVEC_BUILTIN_VUPKLPX:
+ return false;
+
default:
if (TARGET_DEBUG_BUILTIN)
fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
case MISC_BUILTIN_SPEC_BARRIER:
{
- emit_insn (gen_rs6000_speculation_barrier ());
+ emit_insn (gen_speculation_barrier ());
return NULL_RTX;
}
names. */
if (mode == IFmode)
{
- set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
- set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
- set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
- set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
- set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
- set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
+ set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
+ set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
+ set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
+ set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
+ set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
+ set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
if (TARGET_POWERPC64)
{
if (mode != TFmode && FLOAT128_IBM_P (TFmode))
set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
- set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
- set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
- set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
- set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
- set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
- set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
+ set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
+ set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
+ set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
+ set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
+ set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
+ set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
return TARGET_32BIT ? SImode : word_mode;
}
+/* Target hook for translate_mode_attribute. */
+static machine_mode
+rs6000_translate_mode_attribute (machine_mode mode)
+{
+ if ((FLOAT128_IEEE_P (mode)
+ && ieee128_float_type_node == long_double_type_node)
+ || (FLOAT128_IBM_P (mode)
+ && ibm128_float_type_node == long_double_type_node))
+ return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
+ return mode;
+}
+
/* Target hook for scalar_mode_supported_p. */
static bool
rs6000_scalar_mode_supported_p (scalar_mode mode)
{ "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
{ "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
{ "string", 0, false, true },
- { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
{ "update", OPTION_MASK_NO_UPDATE, true , true },
{ "vsx", OPTION_MASK_VSX, false, true },
#ifdef OPTION_MASK_64BIT
return;
}
-/* Wrap a TOC address that can be fused to indicate that special fusion
- processing is needed. */
-
-rtx
-fusion_wrap_memory_address (rtx old_mem)
-{
- rtx old_addr = XEXP (old_mem, 0);
- rtvec v = gen_rtvec (1, old_addr);
- rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
- return replace_equiv_address_nv (old_mem, new_addr, false);
-}
-
/* Given an address, convert it into the addis and load offset parts. Addresses
created during the peephole2 process look like:
(lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
- (unspec [(...)] UNSPEC_TOCREL))
-
- Addresses created via toc fusion look like:
- (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
+ (unspec [(...)] UNSPEC_TOCREL)) */
static void
fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
{
rtx hi, lo;
- if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
- {
- lo = XVECEXP (addr, 0, 0);
- hi = gen_rtx_HIGH (Pmode, lo);
- }
- else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
+ if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
{
hi = XEXP (addr, 0);
lo = XEXP (addr, 1);
is the logical address that was formed during peephole2:
(lo_sum (high) (low-part))
- Or the address is the TOC address that is wrapped before register allocation:
- (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
-
The code is complicated, so we call output_asm_insn directly, and just
return "". */