/* Ampere Computing ('\xC0') cores. */
AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, (F16, RNG, AES, SHA3), ampere1, 0xC0, 0xac3, -1)
+AARCH64_CORE("ampere1a", ampere1a, cortexa57, V8_6A, (F16, RNG, AES, SHA3, MEMTAG), ampere1a, 0xC0, 0xac4, -1)
/* Do not swap around "emag" and "xgene1",
this order is required to handle variant correctly. */
AARCH64_CORE("emag", emag, xgene1, V8A, (CRC, CRYPTO), emag, 0x50, 0x000, 3)
}
};
+const struct cpu_cost_table ampere1a_extra_costs =
+{
+ /* ALU */
+ {
+ 0, /* arith. */
+ 0, /* logical. */
+ 0, /* shift. */
+ COSTS_N_INSNS (1), /* shift_reg. */
+ 0, /* arith_shift. */
+ COSTS_N_INSNS (1), /* arith_shift_reg. */
+ 0, /* log_shift. */
+ COSTS_N_INSNS (1), /* log_shift_reg. */
+ 0, /* extend. */
+ COSTS_N_INSNS (1), /* extend_arith. */
+ 0, /* bfi. */
+ 0, /* bfx. */
+ 0, /* clz. */
+ 0, /* rev. */
+ 0, /* non_exec. */
+ true /* non_exec_costs_exec. */
+ },
+ {
+ /* MULT SImode */
+ {
+ COSTS_N_INSNS (3), /* simple. */
+ COSTS_N_INSNS (3), /* flag_setting. */
+ COSTS_N_INSNS (3), /* extend. */
+ COSTS_N_INSNS (4), /* add. */
+ COSTS_N_INSNS (4), /* extend_add. */
+ COSTS_N_INSNS (19) /* idiv. */
+ },
+ /* MULT DImode */
+ {
+ COSTS_N_INSNS (3), /* simple. */
+ 0, /* flag_setting (N/A). */
+ COSTS_N_INSNS (3), /* extend. */
+ COSTS_N_INSNS (4), /* add. */
+ COSTS_N_INSNS (4), /* extend_add. */
+ COSTS_N_INSNS (35) /* idiv. */
+ }
+ },
+ /* LD/ST */
+ {
+ COSTS_N_INSNS (4), /* load. */
+ COSTS_N_INSNS (4), /* load_sign_extend. */
+ 0, /* ldrd (n/a). */
+ 0, /* ldm_1st. */
+ 0, /* ldm_regs_per_insn_1st. */
+ 0, /* ldm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (5), /* loadf. */
+ COSTS_N_INSNS (5), /* loadd. */
+ COSTS_N_INSNS (5), /* load_unaligned. */
+ 0, /* store. */
+ 0, /* strd. */
+ 0, /* stm_1st. */
+ 0, /* stm_regs_per_insn_1st. */
+ 0, /* stm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (2), /* storef. */
+ COSTS_N_INSNS (2), /* stored. */
+ COSTS_N_INSNS (2), /* store_unaligned. */
+ COSTS_N_INSNS (3), /* loadv. */
+ COSTS_N_INSNS (3) /* storev. */
+ },
+ {
+ /* FP SFmode */
+ {
+ COSTS_N_INSNS (25), /* div. */
+ COSTS_N_INSNS (4), /* mult. */
+ COSTS_N_INSNS (4), /* mult_addsub. */
+ COSTS_N_INSNS (4), /* fma. */
+ COSTS_N_INSNS (4), /* addsub. */
+ COSTS_N_INSNS (2), /* fpconst. */
+ COSTS_N_INSNS (4), /* neg. */
+ COSTS_N_INSNS (4), /* compare. */
+ COSTS_N_INSNS (4), /* widen. */
+ COSTS_N_INSNS (4), /* narrow. */
+ COSTS_N_INSNS (4), /* toint. */
+ COSTS_N_INSNS (4), /* fromint. */
+ COSTS_N_INSNS (4) /* roundint. */
+ },
+ /* FP DFmode */
+ {
+ COSTS_N_INSNS (34), /* div. */
+ COSTS_N_INSNS (5), /* mult. */
+ COSTS_N_INSNS (5), /* mult_addsub. */
+ COSTS_N_INSNS (5), /* fma. */
+ COSTS_N_INSNS (5), /* addsub. */
+ COSTS_N_INSNS (2), /* fpconst. */
+ COSTS_N_INSNS (5), /* neg. */
+ COSTS_N_INSNS (5), /* compare. */
+ COSTS_N_INSNS (5), /* widen. */
+ COSTS_N_INSNS (5), /* narrow. */
+ COSTS_N_INSNS (6), /* toint. */
+ COSTS_N_INSNS (6), /* fromint. */
+ COSTS_N_INSNS (5) /* roundint. */
+ }
+ },
+ /* Vector */
+ {
+ COSTS_N_INSNS (3), /* alu. */
+ COSTS_N_INSNS (3), /* mult. */
+ COSTS_N_INSNS (2), /* movi. */
+ COSTS_N_INSNS (2), /* dup. */
+ COSTS_N_INSNS (2) /* extract. */
+ }
+};
+
#endif
AARCH64_FUSION_PAIR ("aes+aesmc", AES_AESMC)
AARCH64_FUSION_PAIR ("alu+branch", ALU_BRANCH)
AARCH64_FUSION_PAIR ("alu+cbz", ALU_CBZ)
+AARCH64_FUSION_PAIR ("addsub_2reg_const1", ADDSUB_2REG_CONST1)
#undef AARCH64_FUSION_PAIR
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from aarch64-cores.def
(define_attr "tune"
- "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexa715,cortexx2,neoversen2,demeter,neoversev2"
+ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexa715,cortexx2,neoversen2,demeter,neoversev2"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
&ere1_prefetch_tune
};
+static const struct tune_params ampere1a_tunings =
+{
+ &ere1a_extra_costs,
+ &generic_addrcost_table,
+ &generic_regmove_cost,
+ &ere1_vector_cost,
+ &generic_branch_cost,
+ &generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
+ 4, /* issue_rate */
+ (AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_AES_AESMC |
+ AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_MOVK_MOVK |
+ AARCH64_FUSE_ALU_BRANCH /* adds, ands, bics, ccmp, ccmn */ |
+ AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_ALU_CBZ |
+ AARCH64_FUSE_ADDSUB_2REG_CONST1),
+ /* fusible_ops */
+ "32", /* function_align. */
+ "4", /* jump_align. */
+ "32:16", /* loop_align. */
+ 2, /* int_reassoc_width. */
+ 4, /* fp_reassoc_width. */
+ 2, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
+ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
+ &ere1_prefetch_tune
+};
+
static const advsimd_vec_cost neoversev1_advsimd_vector_cost =
{
2, /* int_stmt_cost */
}
}
+ /* Fuse A+B+1 and A-B-1 */
+ if (simple_sets_p
+ && aarch64_fusion_enabled_p (AARCH64_FUSE_ADDSUB_2REG_CONST1))
+ {
+ /* We're trying to match:
+ prev == (set (r0) (plus (r0) (r1)))
+ curr == (set (r0) (plus (r0) (const_int 1)))
+ or:
+ prev == (set (r0) (minus (r0) (r1)))
+ curr == (set (r0) (plus (r0) (const_int -1))) */
+
+ rtx prev_src = SET_SRC (prev_set);
+ rtx curr_src = SET_SRC (curr_set);
+
+ int polarity = 1;
+ if (GET_CODE (prev_src) == MINUS)
+ polarity = -1;
+
+ if (GET_CODE (curr_src) == PLUS
+ && (GET_CODE (prev_src) == PLUS || GET_CODE (prev_src) == MINUS)
+ && CONST_INT_P (XEXP (curr_src, 1))
+ && INTVAL (XEXP (curr_src, 1)) == polarity
+ && REG_P (XEXP (curr_src, 0))
+ && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (curr_src, 0)))
+ return true;
+ }
+
return false;
}
@samp{cortex-a75.cortex-a55}, @samp{cortex-a76.cortex-a55},
@samp{cortex-r82}, @samp{cortex-x1}, @samp{cortex-x1c}, @samp{cortex-x2},
@samp{cortex-a510}, @samp{cortex-a710}, @samp{cortex-a715}, @samp{ampere1},
-@samp{native}.
+@samp{ampere1a}, and @samp{native}.
The values @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53},
@samp{cortex-a73.cortex-a35}, @samp{cortex-a73.cortex-a53},