TM_MULTILIB_CONFIG=`echo $TM_MULTILIB_CONFIG | sed 's/^,//'`
need_64bit_isa=yes
case X"${with_cpu}" in
- Xgeneric|Xatom|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3)
+ Xgeneric|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3)
;;
X)
if test x$with_cpu_64 = x; then
;;
*)
echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2
- echo "generic atom core2 corei7 corei7-avx nocona x86-64 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2
+ echo "generic atom slm core2 corei7 corei7-avx nocona x86-64 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2
exit 1
;;
esac
tmake_file="$tmake_file i386/t-sol2-64"
need_64bit_isa=yes
case X"${with_cpu}" in
- Xgeneric|Xatom|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3)
+ Xgeneric|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3)
;;
X)
if test x$with_cpu_64 = x; then
;;
*)
echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2
- echo "generic atom core2 corei7 corei7-avx nocona x86-64 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2
+ echo "generic atom slm core2 corei7 corei7-avx nocona x86-64 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2
exit 1
;;
esac
if test x$enable_targets = xall; then
tm_defines="${tm_defines} TARGET_BI_ARCH=1"
case X"${with_cpu}" in
- Xgeneric|Xatom|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3)
+ Xgeneric|Xatom|Xslm|Xcore2|Xcorei7|Xcorei7-avx|Xnocona|Xx86-64|Xbdver3|Xbdver2|Xbdver1|Xbtver2|Xbtver1|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx|Xathlon64-sse3|Xk8-sse3|Xopteron-sse3)
;;
X)
if test x$with_cpu_64 = x; then
;;
*)
echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2
- echo "generic atom core2 corei7 Xcorei7-avx nocona x86-64 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2
+ echo "generic atom slm core2 corei7 Xcorei7-avx nocona x86-64 bdver3 bdver2 bdver1 btver2 btver1 amdfam10 barcelona k8 opteron athlon64 athlon-fx athlon64-sse3 k8-sse3 opteron-sse3" 1>&2
exit 1
;;
esac
arch=atom
cpu=atom
;;
+ slm-*)
+ arch=slm
+ cpu=slm
+ ;;
core2-*)
arch=core2
cpu=core2
arch=atom
cpu=atom
;;
+ slm-*)
+ arch=slm
+ cpu=slm
+ ;;
core2-*)
arch=core2
cpu=core2
| k8 | k8-sse3 | athlon64 | athlon64-sse3 | opteron \
| opteron-sse3 | athlon-fx | bdver3 | bdver2 | bdver1 | btver2 \
| btver1 | amdfam10 | barcelona | nocona | core2 | corei7 \
- | corei7-avx | core-avx-i | core-avx2 | atom)
+ | corei7-avx | core-avx-i | core-avx2 | atom | slm)
# OK
;;
*)
/* Assume Sandy Bridge. */
cpu = "corei7-avx";
else if (has_sse4_2)
- /* Assume Core i7. */
- cpu = "corei7";
+ {
+ if (has_movbe)
+ /* Assume SLM. */
+ cpu = "slm";
+ else
+ /* Assume Core i7. */
+ cpu = "corei7";
+ }
else if (has_ssse3)
{
if (has_movbe)
def_or_undef (parse_in, "__atom");
def_or_undef (parse_in, "__atom__");
break;
+ case PROCESSOR_SLM:
+ def_or_undef (parse_in, "__slm");
+ def_or_undef (parse_in, "__slm__");
+ break;
/* use PROCESSOR_max to not set/unset the arch macro. */
case PROCESSOR_max:
break;
case PROCESSOR_ATOM:
def_or_undef (parse_in, "__tune_atom__");
break;
+ case PROCESSOR_SLM:
+ def_or_undef (parse_in, "__tune_slm__");
+ break;
case PROCESSOR_GENERIC32:
case PROCESSOR_GENERIC64:
break;
1, /* cond_not_taken_branch_cost. */
};
+static const
+struct processor_costs slm_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (2)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 17, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {4, 4, 4}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {12, 12, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {8, 8, 8}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {8, 8, 8}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 5, /* MMX or SSE register to integer */
+ 32, /* size of l1 cache. */
+ 256, /* size of l2 cache. */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 3, /* Branch cost */
+ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (8), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (20), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (8), /* cost of FABS instruction. */
+ COSTS_N_INSNS (8), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
+ {{libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
+ {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
+ {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
+ {{libcall, {{8, loop, false}, {15, unrolled_loop, false},
+ {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+ {libcall, {{24, loop, false}, {32, unrolled_loop, false},
+ {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
/* Generic64 should produce code tuned for Nocona and K8. */
static const
struct processor_costs generic64_cost = {
#define m_HASWELL (1<<PROCESSOR_HASWELL)
#define m_CORE_ALL (m_CORE2 | m_COREI7 | m_HASWELL)
#define m_ATOM (1<<PROCESSOR_ATOM)
+#define m_SLM (1<<PROCESSOR_SLM)
#define m_GEODE (1<<PROCESSOR_GEODE)
#define m_K6 (1<<PROCESSOR_K6)
m_486 | m_PENT,
/* X86_TUNE_UNROLL_STRLEN */
- m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE_ALL | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
+ m_486 | m_PENT | m_PPRO | m_ATOM | m_SLM | m_CORE_ALL | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
on simulation result. But after P4 was made, no performance benefit
~m_386,
/* X86_TUNE_USE_SAHF */
- m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC,
+ m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC,
/* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
partial dependencies. */
- m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
+ m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
register stalls on Generic32 compilation setting as well. However
m_386 | m_486 | m_K6_GEODE,
/* X86_TUNE_USE_SIMODE_FIOP */
- ~(m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
+ ~(m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC),
/* X86_TUNE_USE_MOV0 */
m_K6,
/* X86_TUNE_USE_CLTD */
- ~(m_PENT | m_ATOM | m_K6),
+ ~(m_PENT | m_ATOM | m_SLM | m_K6),
/* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
m_PENT4,
~(m_PENT | m_PPRO),
/* X86_TUNE_PROMOTE_QIMODE */
- m_386 | m_486 | m_PENT | m_CORE_ALL | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
+ m_386 | m_486 | m_PENT | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_FAST_PREFIX */
~(m_386 | m_486 | m_PENT),
/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
for DFmode copies */
- ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
+ ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC),
/* X86_TUNE_PARTIAL_REG_DEPENDENCY */
- m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
+ m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
conflict here in between PPro/Pentium4 based chips that thread 128bit
shows that disabling this option on P4 brings over 20% SPECfp regression,
while enabling it on K8 brings roughly 2.4% regression that can be partly
masked by careful scheduling of moves. */
- m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
+ m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMDFAM10 | m_BDVER | m_GENERIC,
/* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
- m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER,
+ m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER | m_SLM,
/* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
- m_COREI7 | m_BDVER,
+ m_COREI7 | m_BDVER | m_SLM,
/* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
m_BDVER ,
m_PPRO | m_P4_NOCONA,
/* X86_TUNE_MEMORY_MISMATCH_STALL */
- m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
+ m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_PROLOGUE_USING_MOVE */
m_PPRO | m_ATHLON_K8,
/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
than 4 branch instructions in the 16 byte window. */
- m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
+ m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_SCHEDULE */
- m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
+ m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_USE_BT */
- m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
+ m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_USE_INCDEC */
- ~(m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_GENERIC),
+ ~(m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GENERIC),
/* X86_TUNE_PAD_RETURNS */
m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC,
m_ATOM,
/* X86_TUNE_EXT_80387_CONSTANTS */
- m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
+ m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
/* X86_TUNE_AVOID_VECTOR_DECODE */
m_CORE_ALL | m_K8 | m_GENERIC64,
/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
will impact LEA instruction selection. */
- m_ATOM,
+ m_ATOM | m_SLM,
/* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
instructions. */
/* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
during reassociation of fp computation. */
- m_ATOM | m_HASWELL,
+ m_ATOM | m_SLM | m_HASWELL,
/* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
regs instead of memory. */
/* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
a conditional move. */
- m_ATOM
+ m_ATOM,
+
+ /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
+ fp converts to destination register. */
+ m_SLM
+
};
/* Feature tests against the various architecture variations. */
};
static const unsigned int x86_accumulate_outgoing_args
- = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC;
+ = m_PPRO | m_P4_NOCONA | m_ATOM | m_SLM | m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC;
static const unsigned int x86_arch_always_fancy_math_387
- = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
+ = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC;
static const unsigned int x86_avx256_split_unaligned_load
= m_COREI7 | m_GENERIC;
{&bdver3_cost, 32, 24, 32, 7, 32},
{&btver1_cost, 32, 24, 32, 7, 32},
{&btver2_cost, 32, 24, 32, 7, 32},
- {&atom_cost, 16, 15, 16, 7, 16}
+ {&atom_cost, 16, 15, 16, 7, 16},
+ {&slm_cost, 16, 15, 16, 7, 16}
};
static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
"corei7",
"core-avx2",
"atom",
+ "slm",
"geode",
"k6",
"k6-2",
{"atom", PROCESSOR_ATOM, CPU_ATOM,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_CX16 | PTA_MOVBE | PTA_FXSR},
+ {"slm", PROCESSOR_SLM, CPU_SLM,
+ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16 | PTA_MOVBE
+ | PTA_FXSR},
{"geode", PROCESSOR_GEODE, CPU_GEODE,
PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
{"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
static bool
ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
- unsigned int regno2, int split_cost)
+ unsigned int regno2, int split_cost, bool has_scale)
{
int dist_define, dist_use;
+ /* For Silvermont if using a 2-source or 3-source LEA for
+ non-destructive destination purposes, or due to wanting
+ ability to use SCALE, the use of LEA is justified. */
+ if (ix86_tune == PROCESSOR_SLM)
+ {
+ if (has_scale)
+ return true;
+ if (split_cost < 1)
+ return false;
+ if (regno0 == regno1 || regno0 == regno2)
+ return false;
+ return true;
+ }
+
dist_define = distance_non_agu_define (regno1, regno2, insn);
dist_use = distance_agu_use (regno0, insn);
if (regno0 == regno1 || regno0 == regno2)
return false;
else
- return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
+ return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
}
/* Return true if we should emit lea instruction instead of mov
regno0 = true_regnum (operands[0]);
regno1 = true_regnum (operands[1]);
- return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0);
+ return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
}
/* Return true if we need to split lea into a sequence of
split_cost -= 1;
}
- return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
+ return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
+ parts.scale > 1);
}
/* Emit x86 binary operand CODE in mode MODE, where the first operand
if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
return false;
- return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
+ return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
}
/* Return true if destination reg of SET_BODY is shift count of
{
case PROCESSOR_PENTIUM:
case PROCESSOR_ATOM:
+ case PROCESSOR_SLM:
case PROCESSOR_K6:
case PROCESSOR_BTVER2:
return 2;
return false;
}
+/* Helper function for exact_store_load_dependency.
+ Return true if addr is found in insn. */
+static bool
+exact_dependency_1 (rtx addr, rtx insn)
+{
+ enum rtx_code code;
+ const char *format_ptr;
+ int i, j;
+
+ code = GET_CODE (insn);
+ switch (code)
+ {
+ case MEM:
+ if (rtx_equal_p (addr, insn))
+ return true;
+ break;
+ case REG:
+ CASE_CONST_ANY:
+ case SYMBOL_REF:
+ case CODE_LABEL:
+ case PC:
+ case CC0:
+ case EXPR_LIST:
+ return false;
+ default:
+ break;
+ }
+
+ format_ptr = GET_RTX_FORMAT (code);
+ for (i = 0; i < GET_RTX_LENGTH (code); i++)
+ {
+ switch (*format_ptr++)
+ {
+ case 'e':
+ if (exact_dependency_1 (addr, XEXP (insn, i)))
+ return true;
+ break;
+ case 'E':
+ for (j = 0; j < XVECLEN (insn, i); j++)
+ if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
+ return true;
+ break;
+ }
+ }
+ return false;
+}
+
+/* Return true if there exists exact dependency for store & load, i.e.
+ the same memory address is used in them. */
+static bool
+exact_store_load_dependency (rtx store, rtx load)
+{
+ rtx set1, set2;
+
+ set1 = single_set (store);
+ if (!set1)
+ return false;
+ if (!MEM_P (SET_DEST (set1)))
+ return false;
+ set2 = single_set (load);
+ if (!set2)
+ return false;
+ if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
+ return true;
+ return false;
+}
+
static int
ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
{
else
cost = 0;
}
+ break;
+
+ case PROCESSOR_SLM:
+ if (!reload_completed)
+ return cost;
+
+ /* Increase cost of integer loads. */
+ memory = get_attr_memory (dep_insn);
+ if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+ {
+ enum attr_unit unit = get_attr_unit (dep_insn);
+ if (unit == UNIT_INTEGER && cost == 1)
+ {
+ if (memory == MEMORY_LOAD)
+ cost = 3;
+ else
+ {
+ /* Increase cost of ld/st for short int types only
+ because of store forwarding issue. */
+ rtx set = single_set (dep_insn);
+ if (set && (GET_MODE (SET_DEST (set)) == QImode
+ || GET_MODE (SET_DEST (set)) == HImode))
+ {
+ /* Increase cost of store/load insn if exact
+ dependence exists and it is load insn. */
+ enum attr_memory insn_memory = get_attr_memory (insn);
+ if (insn_memory == MEMORY_LOAD
+ && exact_store_load_dependency (dep_insn, insn))
+ cost = 3;
+ }
+ }
+ }
+ }
default:
break;
case PROCESSOR_COREI7:
case PROCESSOR_HASWELL:
case PROCESSOR_ATOM:
+ case PROCESSOR_SLM:
/* Generally, we want haifa-sched:max_issue() to look ahead as far
as many instructions can be executed on a cycle, i.e.,
issue_rate. I wonder why tuning for many CPUs does not do this. */
execution. It is applied if
(1) IMUL instruction is on the top of list;
(2) There exists the only producer of independent IMUL instruction in
- ready list;
- (3) Put found producer on the top of ready list.
- Returns issue rate. */
-
+ ready list.
+ Return index of IMUL producer if it was found and -1 otherwise. */
static int
-ix86_sched_reorder(FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
- int clock_var ATTRIBUTE_UNUSED)
+do_reorder_for_imul (rtx *ready, int n_ready)
{
- static int issue_rate = -1;
- int n_ready = *pn_ready;
- rtx insn, insn1, insn2;
- int i;
+ rtx insn, set, insn1, insn2;
sd_iterator_def sd_it;
dep_t dep;
int index = -1;
+ int i;
- /* Set up issue rate. */
- issue_rate = ix86_issue_rate();
-
- /* Do reodering for Atom only. */
if (ix86_tune != PROCESSOR_ATOM)
- return issue_rate;
- /* Do not perform ready list reodering for pre-reload schedule pass. */
- if (!reload_completed)
- return issue_rate;
- /* Nothing to do if ready list contains only 1 instruction. */
- if (n_ready <= 1)
- return issue_rate;
+ return index;
/* Check that IMUL instruction is on the top of ready list. */
insn = ready[n_ready - 1];
- if (!NONDEBUG_INSN_P (insn))
- return issue_rate;
- insn = PATTERN (insn);
- if (GET_CODE (insn) == PARALLEL)
- insn = XVECEXP (insn, 0, 0);
- if (GET_CODE (insn) != SET)
- return issue_rate;
- if (!(GET_CODE (SET_SRC (insn)) == MULT
- && GET_MODE (SET_SRC (insn)) == SImode))
- return issue_rate;
+ set = single_set (insn);
+ if (!set)
+ return index;
+ if (!(GET_CODE (SET_SRC (set)) == MULT
+ && GET_MODE (SET_SRC (set)) == SImode))
+ return index;
/* Search for producer of independent IMUL instruction. */
- for (i = n_ready - 2; i>= 0; i--)
+ for (i = n_ready - 2; i >= 0; i--)
{
insn = ready[i];
if (!NONDEBUG_INSN_P (insn))
- continue;
+ continue;
/* Skip IMUL instruction. */
insn2 = PATTERN (insn);
if (GET_CODE (insn2) == PARALLEL)
- insn2 = XVECEXP (insn2, 0, 0);
+ insn2 = XVECEXP (insn2, 0, 0);
if (GET_CODE (insn2) == SET
- && GET_CODE (SET_SRC (insn2)) == MULT
- && GET_MODE (SET_SRC (insn2)) == SImode)
- continue;
+ && GET_CODE (SET_SRC (insn2)) == MULT
+ && GET_MODE (SET_SRC (insn2)) == SImode)
+ continue;
FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
- {
- rtx con;
+ {
+ rtx con;
con = DEP_CON (dep);
if (!NONDEBUG_INSN_P (con))
continue;
- insn1 = PATTERN (con);
- if (GET_CODE (insn1) == PARALLEL)
- insn1 = XVECEXP (insn1, 0, 0);
+ insn1 = PATTERN (con);
+ if (GET_CODE (insn1) == PARALLEL)
+ insn1 = XVECEXP (insn1, 0, 0);
- if (GET_CODE (insn1) == SET
- && GET_CODE (SET_SRC (insn1)) == MULT
- && GET_MODE (SET_SRC (insn1)) == SImode)
- {
- sd_iterator_def sd_it1;
- dep_t dep1;
- /* Check if there is no other dependee for IMUL. */
- index = i;
- FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
- {
- rtx pro;
- pro = DEP_PRO (dep1);
+ if (GET_CODE (insn1) == SET
+ && GET_CODE (SET_SRC (insn1)) == MULT
+ && GET_MODE (SET_SRC (insn1)) == SImode)
+ {
+ sd_iterator_def sd_it1;
+ dep_t dep1;
+ /* Check if there is no other dependee for IMUL. */
+ index = i;
+ FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
+ {
+ rtx pro;
+ pro = DEP_PRO (dep1);
if (!NONDEBUG_INSN_P (pro))
continue;
- if (pro != insn)
- index = -1;
- }
- if (index >= 0)
- break;
- }
- }
+ if (pro != insn)
+ index = -1;
+ }
+ if (index >= 0)
+ break;
+ }
+ }
if (index >= 0)
- break;
+ break;
+ }
+ return index;
+}
+
+/* Try to find the best candidate on the top of ready list if two insns
+ have the same priority - candidate is best if its dependees were
+ scheduled earlier. Applied for Silvermont only.
+ Return true if top 2 insns must be interchanged. */
+static bool
+swap_top_of_ready_list (rtx *ready, int n_ready)
+{
+ rtx top = ready[n_ready - 1];
+ rtx next = ready[n_ready - 2];
+ rtx set;
+ sd_iterator_def sd_it;
+ dep_t dep;
+ int clock1 = -1;
+ int clock2 = -1;
+ #define INSN_TICK(INSN) (HID (INSN)->tick)
+
+ if (ix86_tune != PROCESSOR_SLM)
+ return false;
+
+ if (!NONDEBUG_INSN_P (top))
+ return false;
+ if (!NONJUMP_INSN_P (top))
+ return false;
+ if (!NONDEBUG_INSN_P (next))
+ return false;
+ if (!NONJUMP_INSN_P (next))
+ return false;
+ set = single_set (top);
+ if (!set)
+ return false;
+ set = single_set (next);
+ if (!set)
+ return false;
+
+ if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
+ {
+ if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
+ return false;
+ /* Determine winner more precise. */
+ FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
+ {
+ rtx pro;
+ pro = DEP_PRO (dep);
+ if (!NONDEBUG_INSN_P (pro))
+ continue;
+ if (INSN_TICK (pro) > clock1)
+ clock1 = INSN_TICK (pro);
+ }
+ FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
+ {
+ rtx pro;
+ pro = DEP_PRO (dep);
+ if (!NONDEBUG_INSN_P (pro))
+ continue;
+ if (INSN_TICK (pro) > clock2)
+ clock2 = INSN_TICK (pro);
+ }
+
+ if (clock1 == clock2)
+ {
+ /* Determine winner - load must win. */
+ enum attr_memory memory1, memory2;
+ memory1 = get_attr_memory (top);
+ memory2 = get_attr_memory (next);
+ if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
+ return true;
+ }
+ return (bool) (clock2 < clock1);
}
- if (index < 0)
- return issue_rate; /* Didn't find IMUL producer. */
+ return false;
+ #undef INSN_TICK
+}
+
+/* Perform possible reodering of ready list for Atom/Silvermont only.
+ Return issue rate. */
+static int
+ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
+ int clock_var)
+{
+ int issue_rate = -1;
+ int n_ready = *pn_ready;
+ int i;
+ rtx insn;
+ int index = -1;
- if (sched_verbose > 1)
- fprintf(dump, ";;\tatom sched_reorder: swap %d and %d insns\n",
- INSN_UID (ready[index]), INSN_UID (ready[n_ready - 1]));
+ /* Set up issue rate. */
+ issue_rate = ix86_issue_rate ();
+
+ /* Do reodering for Atom/SLM only. */
+ if (ix86_tune != PROCESSOR_ATOM && ix86_tune != PROCESSOR_SLM)
+ return issue_rate;
+
+ /* Nothing to do if ready list contains only 1 instruction. */
+ if (n_ready <= 1)
+ return issue_rate;
- /* Put IMUL producer (ready[index]) at the top of ready list. */
- insn1= ready[index];
- for (i = index; i < n_ready - 1; i++)
- ready[i] = ready[i + 1];
- ready[n_ready - 1] = insn1;
+ /* Do reodering for post-reload scheduler only. */
+ if (!reload_completed)
+ return issue_rate;
+
+ if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
+ {
+ if (sched_verbose > 1)
+ fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
+ INSN_UID (ready[index]));
+ /* Put IMUL producer (ready[index]) at the top of ready list. */
+ insn = ready[index];
+ for (i = index; i < n_ready - 1; i++)
+ ready[i] = ready[i + 1];
+ ready[n_ready - 1] = insn;
+ return issue_rate;
+ }
+ if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
+ {
+ if (sched_verbose > 1)
+ fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
+ INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
+ /* Swap 2 top elements of ready list. */
+ insn = ready[n_ready - 1];
+ ready[n_ready - 1] = ready[n_ready - 2];
+ ready[n_ready - 2] = insn;
+ }
return issue_rate;
}
M_INTEL_COREI7,
M_AMDFAM10H,
M_AMDFAM15H,
+ M_INTEL_SLM,
M_CPU_SUBTYPE_START,
M_INTEL_COREI7_NEHALEM,
M_INTEL_COREI7_WESTMERE,
{"amd", M_AMD},
{"intel", M_INTEL},
{"atom", M_INTEL_ATOM},
+ {"slm", M_INTEL_SLM},
{"core2", M_INTEL_CORE2},
{"corei7", M_INTEL_COREI7},
{"nehalem", M_INTEL_COREI7_NEHALEM},
#define TARGET_BTVER1 (ix86_tune == PROCESSOR_BTVER1)
#define TARGET_BTVER2 (ix86_tune == PROCESSOR_BTVER2)
#define TARGET_ATOM (ix86_tune == PROCESSOR_ATOM)
+#define TARGET_SLM (ix86_tune == PROCESSOR_SLM)
/* Feature tests against the various tunings. */
enum ix86_tune_indices {
X86_TUNE_REASSOC_FP_TO_PARALLEL,
X86_TUNE_GENERAL_REGS_SSE_SPILL,
X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE,
+ X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS,
X86_TUNE_LAST
};
ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL]
#define TARGET_AVOID_MEM_OPND_FOR_CMOVE \
ix86_tune_features[X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE]
+#define TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS \
+ ix86_tune_features[X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS]
/* Feature tests against the various architecture variations. */
enum ix86_arch_indices {
TARGET_CPU_DEFAULT_corei7,
TARGET_CPU_DEFAULT_haswell,
TARGET_CPU_DEFAULT_atom,
+ TARGET_CPU_DEFAULT_slm,
TARGET_CPU_DEFAULT_geode,
TARGET_CPU_DEFAULT_k6,
PROCESSOR_BTVER1,
PROCESSOR_BTVER2,
PROCESSOR_ATOM,
+ PROCESSOR_SLM,
PROCESSOR_max
};
\f
;; Processor type.
(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,corei7,
- atom,generic64,amdfam10,bdver1,bdver2,bdver3,btver1,btver2"
+ atom,slm,generic64,amdfam10,bdver1,bdver2,bdver3,btver1,btver2"
(const (symbol_ref "ix86_schedule")))
;; A basic instruction type. Refinements due to arguments to be
(include "btver2.md")
(include "geode.md")
(include "atom.md")
+(include "slm.md")
(include "core2.md")
\f
CONST0_RTX (V4SFmode), operands[1]));
})
+;; It's more profitable to split and then extend in the same register.
+(define_peephole2
+ [(set (match_operand:DF 0 "register_operand")
+ (float_extend:DF
+ (match_operand:SF 1 "memory_operand")))]
+ "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
+ && optimize_insn_for_speed_p ()
+ && SSE_REG_P (operands[0])"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (float_extend:DF (match_dup 2)))]
+ "operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0]));")
+
(define_insn "*extendsfdf2_mixed"
[(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x")
(float_extend:DF
CONST0_RTX (V2DFmode), operands[1]));
})
+;; It's more profitable to split and then extend in the same register.
+(define_peephole2
+ [(set (match_operand:SF 0 "register_operand")
+ (float_truncate:SF
+ (match_operand:DF 1 "memory_operand")))]
+ "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
+ && optimize_insn_for_speed_p ()
+ && SSE_REG_P (operands[0])"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (float_truncate:SF (match_dup 2)))]
+ "operands[2] = gen_rtx_REG (DFmode, REGNO (operands[0]));")
+
(define_expand "truncdfsf2_with_temp"
[(parallel [(set (match_operand:SF 0)
(float_truncate:SF (match_operand:DF 1)))
--- /dev/null
+;; Slivermont(SLM) Scheduling
+;; Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; Silvermont has 2 out-of-order IEC, 2 in-order FEC and 1 in-order MEC.
+
+
+(define_automaton "slm")
+
+;; EU: Execution Unit
+;; Silvermont EUs are connected by port 0 or port 1.
+
+;; SLM has two ports: port 0 and port 1 connecting to all execution units
+(define_cpu_unit "slm-port-0,slm-port-1" "slm")
+
+(define_cpu_unit "slm-ieu-0, slm-ieu-1,
+ slm-imul, slm-feu-0, slm-feu-1"
+ "slm")
+
+(define_reservation "slm-all-ieu" "(slm-ieu-0 + slm-ieu-1 + slm-imul)")
+(define_reservation "slm-all-feu" "(slm-feu-0 + slm-feu-1)")
+(define_reservation "slm-all-eu" "(slm-all-ieu + slm-all-feu)")
+(define_reservation "slm-fp-0" "(slm-port-0 + slm-feu-0)")
+
+;; Some EUs have duplicated copied and can be accessed via either
+;; port 0 or port 1
+;; (define_reservation "slm-port-either" "(slm-port-0 | slm-port-1)"
+(define_reservation "slm-port-dual" "(slm-port-0 + slm-port-1)")
+
+;;; fmul insn can have 4 or 5 cycles latency
+(define_reservation "slm-fmul-5c"
+ "(slm-port-0 + slm-feu-0), slm-feu-0, nothing*3")
+(define_reservation "slm-fmul-4c" "(slm-port-0 + slm-feu-0), nothing*3")
+
+;;; fadd can has 3 cycles latency depends on instruction forms
+(define_reservation "slm-fadd-3c" "(slm-port-1 + slm-feu-1), nothing*2")
+(define_reservation "slm-fadd-4c"
+ "(slm-port-1 + slm-feu-1), slm-feu-1, nothing*2")
+
+;;; imul insn has 3 cycles latency for SI operands
+(define_reservation "slm-imul-32"
+ "(slm-port-1 + slm-imul), nothing*2")
+(define_reservation "slm-imul-mem-32"
+ "(slm-port-1 + slm-imul + slm-port-0), nothing*2")
+;;; imul has 4 cycles latency for DI operands with 1/2 tput
+(define_reservation "slm-imul-64"
+ "(slm-port-1 + slm-imul), slm-imul, nothing*2")
+
+;;; dual-execution instructions can have 1,2,4,5 cycles latency depends on
+;;; instruction forms
+(define_reservation "slm-dual-1c" "(slm-port-dual + slm-all-eu)")
+(define_reservation "slm-dual-2c"
+ "(slm-port-dual + slm-all-eu, nothing)")
+
+;;; Most of simple ALU instructions have 1 cycle latency. Some of them
+;;; issue in port 0, some in port 0 and some in either port.
+(define_reservation "slm-simple-0" "(slm-port-0 + slm-ieu-0)")
+(define_reservation "slm-simple-1" "(slm-port-1 + slm-ieu-1)")
+(define_reservation "slm-simple-either" "(slm-simple-0 | slm-simple-1)")
+
+;;; Complex macro-instruction has variants of latency, and uses both ports.
+(define_reservation "slm-complex" "(slm-port-dual + slm-all-eu)")
+
+(define_insn_reservation "slm_other" 9
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "other")
+ (eq_attr "atom_unit" "!jeu")))
+ "slm-complex, slm-all-eu*8")
+
+;; return has type "other" with atom_unit "jeu"
+(define_insn_reservation "slm_other_2" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "other")
+ (eq_attr "atom_unit" "jeu")))
+ "slm-dual-1c")
+
+(define_insn_reservation "slm_multi" 9
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "multi"))
+ "slm-complex, slm-all-eu*8")
+
+;; Normal alu insns without carry
+(define_insn_reservation "slm_alu" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "alu")
+ (and (eq_attr "memory" "none")
+ (eq_attr "use_carry" "0"))))
+ "slm-simple-either")
+
+;; Normal alu insns without carry, but use MEC.
+(define_insn_reservation "slm_alu_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "alu")
+ (and (eq_attr "memory" "!none")
+ (eq_attr "use_carry" "0"))))
+ "slm-simple-either")
+
+;; Alu insn consuming CF, such as add/sbb
+(define_insn_reservation "slm_alu_carry" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "alu")
+ (and (eq_attr "memory" "none")
+ (eq_attr "use_carry" "1"))))
+ "slm-simple-either, nothing")
+
+;; Alu insn consuming CF, such as add/sbb
+(define_insn_reservation "slm_alu_carry_mem" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "alu")
+ (and (eq_attr "memory" "!none")
+ (eq_attr "use_carry" "1"))))
+ "slm-simple-either, nothing")
+
+(define_insn_reservation "slm_alu1" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "alu1")
+ (eq_attr "memory" "none") (eq_attr "prefix_0f" "0")))
+ "slm-simple-either")
+
+;; bsf and bsf insn
+(define_insn_reservation "slm_alu1_1" 10
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "alu1")
+ (eq_attr "memory" "none") (eq_attr "prefix_0f" "1")))
+ "slm-simple-1, slm-ieu-1*9")
+
+(define_insn_reservation "slm_alu1_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "alu1")
+ (eq_attr "memory" "!none")))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_negnot" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "negnot")
+ (eq_attr "memory" "none")))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_negnot_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "negnot")
+ (eq_attr "memory" "!none")))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_imov" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "none")))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_imov_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "!none")))
+ "slm-simple-0")
+
+;; 16<-16, 32<-32
+(define_insn_reservation "slm_imovx" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "imovx")
+ (and (eq_attr "memory" "none")
+ (ior (and (match_operand:HI 0 "register_operand")
+ (match_operand:HI 1 "general_operand"))
+ (and (match_operand:SI 0 "register_operand")
+ (match_operand:SI 1 "general_operand"))))))
+ "slm-simple-either")
+
+;; 16<-16, 32<-32, mem
+(define_insn_reservation "slm_imovx_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "imovx")
+ (and (eq_attr "memory" "!none")
+ (ior (and (match_operand:HI 0 "register_operand")
+ (match_operand:HI 1 "general_operand"))
+ (and (match_operand:SI 0 "register_operand")
+ (match_operand:SI 1 "general_operand"))))))
+ "slm-simple-either")
+
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8
+(define_insn_reservation "slm_imovx_2" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "imovx")
+ (and (eq_attr "memory" "none")
+ (ior (match_operand:QI 0 "register_operand")
+ (ior (and (match_operand:SI 0 "register_operand")
+ (not (match_operand:SI 1 "general_operand")))
+ (match_operand:DI 0 "register_operand"))))))
+ "slm-simple-either")
+
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8, mem
+(define_insn_reservation "slm_imovx_2_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "imovx")
+ (and (eq_attr "memory" "!none")
+ (ior (match_operand:QI 0 "register_operand")
+ (ior (and (match_operand:SI 0 "register_operand")
+ (not (match_operand:SI 1 "general_operand")))
+ (match_operand:DI 0 "register_operand"))))))
+ "slm-simple-0")
+
+;; 16<-8
+(define_insn_reservation "slm_imovx_3" 3
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "imovx")
+ (and (match_operand:HI 0 "register_operand")
+ (match_operand:QI 1 "general_operand"))))
+ "slm-simple-0, nothing*2")
+
+(define_insn_reservation "slm_lea" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "lea")
+ (eq_attr "mode" "!HI")))
+ "slm-simple-either")
+
+;; lea 16bit address is complex insn
+(define_insn_reservation "slm_lea_2" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "lea")
+ (eq_attr "mode" "HI")))
+ "slm-complex, slm-all-eu")
+
+(define_insn_reservation "slm_incdec" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "incdec")
+ (eq_attr "memory" "none")))
+ "slm-simple-0")
+
+(define_insn_reservation "slm_incdec_mem" 3
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "incdec")
+ (eq_attr "memory" "!none")))
+ "slm-simple-0, nothing*2")
+
+;; simple shift instruction use SHIFT eu, none memory
+(define_insn_reservation "slm_ishift" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ishift")
+ (and (eq_attr "memory" "none") (eq_attr "prefix_0f" "0"))))
+ "slm-simple-0")
+
+;; simple shift instruction use SHIFT eu, memory
+(define_insn_reservation "slm_ishift_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ishift")
+ (and (eq_attr "memory" "!none") (eq_attr "prefix_0f" "0"))))
+ "slm-simple-0")
+
+;; DF shift (prefixed with 0f) is complex insn with latency of 4 cycles
+(define_insn_reservation "slm_ishift_3" 4
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ishift")
+ (eq_attr "prefix_0f" "1")))
+ "slm-complex, slm-all-eu*3")
+
+(define_insn_reservation "slm_ishift1" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ishift1")
+ (eq_attr "memory" "none")))
+ "slm-simple-0")
+
+(define_insn_reservation "slm_ishift1_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ishift1")
+ (eq_attr "memory" "!none")))
+ "slm-simple-0")
+
+(define_insn_reservation "slm_rotate" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "rotate")
+ (eq_attr "memory" "none")))
+ "slm-simple-0")
+
+(define_insn_reservation "slm_rotate_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "rotate")
+ (eq_attr "memory" "!none")))
+ "slm-simple-0")
+
+(define_insn_reservation "slm_rotate1" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "rotate1")
+ (eq_attr "memory" "none")))
+ "slm-simple-0")
+
+(define_insn_reservation "slm_rotate1_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "rotate1")
+ (eq_attr "memory" "!none")))
+ "slm-simple-0")
+
+(define_insn_reservation "slm_imul" 3
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "imul")
+ (and (eq_attr "memory" "none") (eq_attr "mode" "SI"))))
+ "slm-imul-32")
+
+(define_insn_reservation "slm_imul_mem" 3
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "imul")
+ (and (eq_attr "memory" "!none") (eq_attr "mode" "SI"))))
+ "slm-imul-mem-32")
+
+;; latency set to 4 as common 64x64 imul with 1/2 tput
+(define_insn_reservation "slm_imul_3" 4
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "imul")
+ (eq_attr "mode" "!SI")))
+ "slm-imul-64")
+
+(define_insn_reservation "slm_idiv" 33
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "idiv"))
+ "slm-complex, slm-all-eu*16, nothing*16")
+
+(define_insn_reservation "slm_icmp" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "icmp")
+ (eq_attr "memory" "none")))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_icmp_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "icmp")
+ (eq_attr "memory" "!none")))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_test" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "test")
+ (eq_attr "memory" "none")))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_test_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "test")
+ (eq_attr "memory" "!none")))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_ibr" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ibr")
+ (eq_attr "memory" "!load")))
+ "slm-simple-1")
+
+;; complex if jump target is from address
+(define_insn_reservation "slm_ibr_2" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ibr")
+ (eq_attr "memory" "load")))
+ "slm-complex, slm-all-eu")
+
+(define_insn_reservation "slm_setcc" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "setcc")
+ (eq_attr "memory" "!store")))
+ "slm-simple-either")
+
+;; 2 cycles complex if target is in memory
+(define_insn_reservation "slm_setcc_2" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "setcc")
+ (eq_attr "memory" "store")))
+ "slm-complex, slm-all-eu")
+
+(define_insn_reservation "slm_icmov" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "icmov")
+ (eq_attr "memory" "none")))
+ "slm-simple-either, nothing")
+
+(define_insn_reservation "slm_icmov_mem" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "icmov")
+ (eq_attr "memory" "!none")))
+ "slm-simple-0, nothing")
+
+;; UCODE if segreg, ignored
+(define_insn_reservation "slm_push" 2
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "push"))
+ "slm-dual-2c")
+
+;; pop r64 is 1 cycle. UCODE if segreg, ignored
+(define_insn_reservation "slm_pop" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "pop")
+ (eq_attr "mode" "DI")))
+ "slm-dual-1c")
+
+;; pop non-r64 is 2 cycles. UCODE if segreg, ignored
+(define_insn_reservation "slm_pop_2" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "pop")
+ (eq_attr "mode" "!DI")))
+ "slm-dual-2c")
+
+;; UCODE if segreg, ignored
+(define_insn_reservation "slm_call" 1
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "call"))
+ "slm-dual-1c")
+
+(define_insn_reservation "slm_callv" 1
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "callv"))
+ "slm-dual-1c")
+
+(define_insn_reservation "slm_leave" 3
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "leave"))
+ "slm-complex, slm-all-eu*2")
+
+(define_insn_reservation "slm_str" 3
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "str"))
+ "slm-complex, slm-all-eu*2")
+
+(define_insn_reservation "slm_sselog" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sselog")
+ (eq_attr "memory" "none")))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_sselog_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sselog")
+ (eq_attr "memory" "!none")))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_sselog1" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sselog1")
+ (eq_attr "memory" "none")))
+ "slm-simple-0")
+
+(define_insn_reservation "slm_sselog1_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sselog1")
+ (eq_attr "memory" "!none")))
+ "slm-simple-0")
+
+;; not pmad, not psad
+(define_insn_reservation "slm_sseiadd" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseiadd")
+ (and (not (match_operand:V2DI 0 "register_operand"))
+ (and (eq_attr "atom_unit" "!simul")
+ (eq_attr "atom_unit" "!complex")))))
+ "slm-simple-either")
+
+;; pmad, psad and 64
+(define_insn_reservation "slm_sseiadd_2" 4
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseiadd")
+ (and (not (match_operand:V2DI 0 "register_operand"))
+ (and (eq_attr "atom_unit" "simul" )
+ (eq_attr "mode" "DI")))))
+ "slm-fmul-4c")
+
+;; pmad, psad and 128
+(define_insn_reservation "slm_sseiadd_3" 5
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseiadd")
+ (and (not (match_operand:V2DI 0 "register_operand"))
+ (and (eq_attr "atom_unit" "simul" )
+ (eq_attr "mode" "TI")))))
+ "slm-fmul-5c")
+
+;; if paddq(64 bit op), phadd/phsub
+(define_insn_reservation "slm_sseiadd_4" 4
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseiadd")
+ (ior (match_operand:V2DI 0 "register_operand")
+ (eq_attr "atom_unit" "complex"))))
+ "slm-fadd-4c")
+
+;; if immediate op.
+(define_insn_reservation "slm_sseishft" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseishft")
+ (and (eq_attr "atom_unit" "!sishuf")
+ (match_operand 2 "immediate_operand"))))
+ "slm-simple-either")
+
+;; if palignr or psrldq
+(define_insn_reservation "slm_sseishft_2" 1
+ (and (eq_attr "cpu" "slm")
+ (ior (eq_attr "type" "sseishft1")
+ (and (eq_attr "type" "sseishft")
+ (and (eq_attr "atom_unit" "sishuf")
+ (match_operand 2 "immediate_operand")))))
+ "slm-simple-0")
+
+;; if reg/mem op
+(define_insn_reservation "slm_sseishft_3" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseishft")
+ (not (match_operand 2 "immediate_operand"))))
+ "slm-complex, slm-all-eu")
+
+(define_insn_reservation "slm_sseimul" 5
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "sseimul"))
+ "slm-fmul-5c")
+
+;; rcpss or rsqrtss
+(define_insn_reservation "slm_sse" 4
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sse")
+ (and (eq_attr "atom_sse_attr" "rcp") (eq_attr "mode" "SF"))))
+ "slm-fmul-4c")
+
+;; movshdup, movsldup. Suggest to type sseishft
+(define_insn_reservation "slm_sse_2" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sse")
+ (eq_attr "atom_sse_attr" "movdup")))
+ "slm-simple-0")
+
+;; lfence
+(define_insn_reservation "slm_sse_3" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sse")
+ (eq_attr "atom_sse_attr" "lfence")))
+ "slm-simple-either")
+
+;; sfence,clflush,mfence, prefetch
+(define_insn_reservation "slm_sse_4" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sse")
+ (ior (eq_attr "atom_sse_attr" "fence")
+ (eq_attr "atom_sse_attr" "prefetch"))))
+ "slm-simple-0")
+
+;; rcpps, rsqrtss, sqrt, ldmxcsr
+(define_insn_reservation "slm_sse_5" 9
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sse")
+ (ior (ior (eq_attr "atom_sse_attr" "sqrt")
+ (eq_attr "atom_sse_attr" "mxcsr"))
+ (and (eq_attr "atom_sse_attr" "rcp")
+ (eq_attr "mode" "V4SF")))))
+ "slm-complex, slm-all-eu*7, nothing")
+
+;; xmm->xmm
+(define_insn_reservation "slm_ssemov" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssemov")
+ (and (match_operand 0 "register_operand" "xy")
+ (match_operand 1 "register_operand" "xy"))))
+ "slm-simple-either")
+
+;; reg->xmm
+(define_insn_reservation "slm_ssemov_2" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssemov")
+ (and (match_operand 0 "register_operand" "xy")
+ (match_operand 1 "register_operand" "r"))))
+ "slm-simple-0")
+
+;; xmm->reg
+(define_insn_reservation "slm_ssemov_3" 3
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssemov")
+ (and (match_operand 0 "register_operand" "r")
+ (match_operand 1 "register_operand" "xy"))))
+ "slm-simple-0, nothing*2")
+
+;; mov mem
+(define_insn_reservation "slm_ssemov_4" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssemov")
+ (and (eq_attr "movu" "0") (eq_attr "memory" "!none"))))
+ "slm-simple-0")
+
+;; movu mem
+(define_insn_reservation "slm_ssemov_5" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssemov")
+ (ior (eq_attr "movu" "1") (eq_attr "memory" "!none"))))
+ "slm-simple-0, nothing")
+
+;; no memory simple
+(define_insn_reservation "slm_sseadd" 3
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseadd")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "!V2DF")
+ (eq_attr "atom_unit" "!complex")))))
+ "slm-fadd-3c")
+
+;; memory simple
+(define_insn_reservation "slm_sseadd_mem" 3
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseadd")
+ (and (eq_attr "memory" "!none")
+ (and (eq_attr "mode" "!V2DF")
+ (eq_attr "atom_unit" "!complex")))))
+ "slm-fadd-3c")
+
+;; maxps, minps, *pd, hadd, hsub
+(define_insn_reservation "slm_sseadd_3" 4
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseadd")
+ (ior (eq_attr "mode" "V2DF") (eq_attr "atom_unit" "complex"))))
+ "slm-fadd-4c")
+
+;; Except dppd/dpps
+(define_insn_reservation "slm_ssemul" 5
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssemul")
+ (eq_attr "mode" "!SF")))
+ "slm-fmul-5c")
+
+;; Except dppd/dpps, 4 cycle if mulss
+(define_insn_reservation "slm_ssemul_2" 4
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssemul")
+ (eq_attr "mode" "SF")))
+ "slm-fmul-4c")
+
+(define_insn_reservation "slm_ssecmp" 1
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "ssecmp"))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_ssecomi" 1
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "ssecomi"))
+ "slm-simple-0")
+
+;; no memory and cvtpi2ps, cvtps2pi, cvttps2pi
+(define_insn_reservation "slm_ssecvt" 5
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssecvt")
+ (ior (and (match_operand:V2SI 0 "register_operand")
+ (match_operand:V4SF 1 "register_operand"))
+ (and (match_operand:V4SF 0 "register_operand")
+ (match_operand:V2SI 1 "register_operand")))))
+ "slm-fp-0, slm-feu-0, nothing*3")
+
+;; memory and cvtpi2ps, cvtps2pi, cvttps2pi
+(define_insn_reservation "slm_ssecvt_mem" 5
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssecvt")
+ (ior (and (match_operand:V2SI 0 "register_operand")
+ (match_operand:V4SF 1 "memory_operand"))
+ (and (match_operand:V4SF 0 "register_operand")
+ (match_operand:V2SI 1 "memory_operand")))))
+"slm-fp-0, slm-feu-0, nothing*3")
+
+;; cvtpd2pi, cvtpi2pd
+(define_insn_reservation "slm_ssecvt_1" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssecvt")
+ (ior (and (match_operand:V2DF 0 "register_operand")
+ (match_operand:V2SI 1 "register_operand"))
+ (and (match_operand:V2SI 0 "register_operand")
+ (match_operand:V2DF 1 "register_operand")))))
+ "slm-fp-0, slm-feu-0")
+
+;; memory and cvtpd2pi, cvtpi2pd
+(define_insn_reservation "slm_ssecvt_1_mem" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssecvt")
+ (ior (and (match_operand:V2DF 0 "register_operand")
+ (match_operand:V2SI 1 "memory_operand"))
+ (and (match_operand:V2SI 0 "register_operand")
+ (match_operand:V2DF 1 "memory_operand")))))
+ "slm-fp-0, slm-feu-0")
+
+;; otherwise. 4 cycles average for cvtss2sd
+(define_insn_reservation "slm_ssecvt_3" 4
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssecvt")
+ (not (ior (and (match_operand:V2SI 0 "register_operand")
+ (match_operand:V4SF 1 "nonimmediate_operand"))
+ (and (match_operand:V4SF 0 "register_operand")
+ (match_operand:V2SI 1 "nonimmediate_operand"))))))
+ "slm-fp-0, nothing*3")
+
+;; memory and cvtsi2sd
+(define_insn_reservation "slm_sseicvt" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseicvt")
+ (and (match_operand:V2DF 0 "register_operand")
+ (match_operand:SI 1 "nonimmediate_operand"))))
+ "slm-fp-0")
+
+;; otherwise. 8 cycles average for cvtsd2si
+(define_insn_reservation "slm_sseicvt_2" 4
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseicvt")
+ (not (and (match_operand:V2DF 0 "register_operand")
+ (match_operand:SI 1 "memory_operand")))))
+ "slm-fp-0, nothing*3")
+
+(define_insn_reservation "slm_ssediv" 13
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "ssediv"))
+ "slm-fp-0, slm-feu-0*10, nothing*2")
+
+;; simple for fmov
+(define_insn_reservation "slm_fmov" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "none")))
+ "slm-simple-either")
+
+;; simple for fmov
+(define_insn_reservation "slm_fmov_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "!none")))
+ "slm-simple-either")
+
+;; Define bypass here
+
+;; There will be 0 cycle stall from cmp/test to jcc
+
+;; There will be 1 cycle stall from flag producer to cmov and adc/sbb
+(define_bypass 2 "slm_icmp, slm_test, slm_alu, slm_alu_carry,
+ slm_alu1, slm_negnot, slm_incdec, slm_ishift,
+ slm_ishift1, slm_rotate, slm_rotate1"
+ "slm_icmov, slm_alu_carry")
+
+;; lea to shift source stall is 1 cycle
+(define_bypass 2 "slm_lea"
+ "slm_ishift, slm_ishift1, slm_rotate, slm_rotate1"
+ "!ix86_dep_by_shift_count")
+
+;; non-lea to shift count stall is 1 cycle
+(define_bypass 2 "slm_alu_carry,
+ slm_alu,slm_alu1,slm_negnot,slm_imov,slm_imovx,
+ slm_incdec,slm_ishift,slm_ishift1,slm_rotate,
+ slm_rotate1, slm_setcc, slm_icmov, slm_pop,
+ slm_alu_mem, slm_alu_carry_mem, slm_alu1_mem,
+ slm_imovx_mem, slm_imovx_2_mem,
+ slm_imov_mem, slm_icmov_mem, slm_fmov_mem"
+ "slm_ishift, slm_ishift1, slm_rotate, slm_rotate1,
+ slm_ishift_mem, slm_ishift1_mem,
+ slm_rotate_mem, slm_rotate1_mem"
+ "ix86_dep_by_shift_count")
VENDOR_MAX
};
+/* Any new types or subtypes have to be inserted at the end. */
+
enum processor_types
{
INTEL_ATOM = 1,
INTEL_COREI7,
AMDFAM10H,
AMDFAM15H,
+ INTEL_SLM,
CPU_TYPE_MAX
};