#include <orc/orcutils.h>
#include <orc/orcdebug.h>
+#undef MMX
#define SIZE 65536
#define ORC_SSE_ALIGNED_DEST_CUTOFF 64
#endif
#if defined(HAVE_I386)
+#ifndef MMX
if (!(orc_sse_get_cpu_flags () & ORC_TARGET_SSE_SSE2)) {
sse_target.executable = FALSE;
}
+#else
+ if (!(orc_mmx_get_cpu_flags () & ORC_TARGET_MMX_MMX)) {
+ mmx_target.executable = FALSE;
+ }
+#endif
#endif
orc_target_register (&sse_target);
#if defined(HAVE_AMD64) || defined(HAVE_I386)
flags |= orc_sse_get_cpu_flags ();
#else
+#ifndef MMX
flags |= ORC_TARGET_SSE_SSE2;
flags |= ORC_TARGET_SSE_SSE3;
flags |= ORC_TARGET_SSE_SSSE3;
+#else
+ flags |= ORC_TARGET_MMX_MMX;
+ flags |= ORC_TARGET_MMX_3DNOW;
+#endif
#endif
return flags;
sse_get_flag_name (int shift)
{
static const char *flags[] = {
+#ifndef MMX
"sse2", "sse3", "ssse3", "sse41", "sse42", "sse4a", "sse5",
"frame_pointer", "short_jumps", "64bit"
+#else
+ "mmx", "mmxext", "3dnow", "3dnowext", "ssse3", "sse41", "",
+ "frame_pointer", "short_jumps", "64bit"
+#endif
};
if (shift >= 0 && shift < sizeof(flags)/sizeof(flags[0])) {
}
compiler->valid_regs[X86_EDI] = 0;
compiler->valid_regs[X86_ESP] = 0;
+#ifndef MMX
for(i=X86_XMM0;i<X86_XMM0+16;i++){
compiler->valid_regs[i] = 1;
}
+#else
+ for(i=X86_XMM0;i<X86_XMM0+8;i++){
+ compiler->valid_regs[i] = 1;
+ }
+#endif
compiler->save_regs[X86_EBX] = 1;
compiler->save_regs[X86_EBP] = 1;
compiler->save_regs[X86_R12] = 1;
}
compiler->valid_regs[compiler->exec_reg] = 0;
- switch (orc_program_get_max_var_size (compiler->program)) {
- case 1:
- compiler->loop_shift = 4;
- break;
- case 2:
- compiler->loop_shift = 3;
- break;
- case 4:
- compiler->loop_shift = 2;
- break;
- case 8:
- compiler->loop_shift = 1;
- break;
- default:
- ORC_ERROR("unhandled max var size %d",
- orc_program_get_max_var_size (compiler->program));
- break;
- }
+#ifndef MMX
+ compiler->loop_shift = 5 - orc_program_get_max_var_size (compiler->program);
+#else
+ compiler->loop_shift = 4 - orc_program_get_max_var_size (compiler->program);
+#endif
compiler->unroll_shift = 1;
compiler->alloc_loop_counter = TRUE;
case ORC_VAR_TYPE_ACCUMULATOR:
src = compiler->vars[i].alloc;
+#ifndef MMX
orc_sse_emit_pshufd (compiler, ORC_SSE_SHUF(3,2,3,2), src, compiler->tmpreg);
+#else
+ orc_mmx_emit_pshufw (compiler, ORC_MMX_SHUF(3,2,3,2), src, compiler->tmpreg);
+#endif
if (compiler->vars[i].size == 2) {
orc_sse_emit_660f (compiler, "paddw", 0xfd, compiler->tmpreg, src);
orc_sse_emit_660f (compiler, "paddd", 0xfe, compiler->tmpreg, src);
}
+#ifndef MMX
orc_sse_emit_pshufd (compiler, ORC_SSE_SHUF(1,1,1,1), src, compiler->tmpreg);
if (compiler->vars[i].size == 2) {
} else {
orc_sse_emit_660f (compiler, "paddd", 0xfe, compiler->tmpreg, src);
}
+#endif
if (compiler->vars[i].size == 2) {
+#ifndef MMX
orc_sse_emit_pshuflw (compiler, ORC_SSE_SHUF(1,1,1,1), src, compiler->tmpreg);
+#else
+ orc_mmx_emit_pshufw (compiler, ORC_MMX_SHUF(1,1,1,1), src, compiler->tmpreg);
+#endif
orc_sse_emit_660f (compiler, "paddw", 0xfd, compiler->tmpreg, src);
}
} else {
orc_x86_emit_mov_imm_reg (compiler, 4, value, compiler->gp_tmpreg);
orc_x86_emit_mov_reg_sse (compiler, compiler->gp_tmpreg, reg);
+#ifndef MMX
orc_sse_emit_pshufd (compiler, 0, reg, reg);
+#else
+ orc_mmx_emit_pshufw (compiler, 0, reg, reg);
+#endif
}
}
orc_x86_emit_label (compiler, 7);
}
+#ifndef MMX
static int
orc_program_has_float (OrcCompiler *compiler)
{
}
return FALSE;
}
+#endif
#define LABEL_REGION1_SKIP 1
#define LABEL_INNER_LOOP_START 2
void
orc_compiler_sse_assemble (OrcCompiler *compiler)
{
+#ifndef MMX
int set_mxcsr = FALSE;
+#endif
int align_var;
if (0 && orc_x86_assemble_copy_check (compiler)) {
orc_x86_emit_prologue (compiler);
+#ifndef MMX
if (orc_program_has_float (compiler)) {
set_mxcsr = TRUE;
orc_sse_set_mxcsr (compiler);
}
+#endif
sse_load_constants_outer (compiler);
sse_save_accumulators (compiler);
+#ifndef MMX
if (set_mxcsr) {
orc_sse_restore_mxcsr (compiler);
}
+#endif
orc_x86_emit_epilogue (compiler);
orc_x86_do_fixups (compiler);
#include <orc/orcdebug.h>
#include <orc/orcsse.h>
+#undef MMX
#define SIZE 65536
/* sse rules */
orc_sse_emit_punpcklbw (compiler, reg, reg);
}
if (src->size <= 2) {
+#ifdef MMX
+ orc_mmx_emit_pshufw (compiler, 0, reg, reg);
+#else
orc_sse_emit_pshuflw (compiler, 0, reg, reg);
+#endif
}
+#ifndef MMX
orc_sse_emit_pshufd (compiler, 0, reg, reg);
+#endif
} else if (src->vartype == ORC_VAR_TYPE_CONST) {
int value = src->value;
} else {
orc_x86_emit_mov_imm_reg (compiler, 4, value, compiler->gp_tmpreg);
orc_x86_emit_mov_reg_sse (compiler, compiler->gp_tmpreg, reg);
+#ifndef MMX
orc_sse_emit_pshufd (compiler, 0, reg, reg);
+#endif
}
}
}
int src = p->vars[insn->src_args[0]].alloc;
int dest = p->vars[insn->dest_args[0]].alloc;
+#ifndef MMX
if (p->loop_shift == 0) {
orc_sse_emit_pslldq (p, 12, src);
}
+#endif
orc_sse_emit_paddd (p, src, dest);
}
int src2 = p->vars[insn->src_args[1]].alloc;
int dest = p->vars[insn->dest_args[0]].alloc;
int tmp = p->tmpreg;
+#ifndef MMX
int tmp2 = X86_XMM7;
if (p->loop_shift == 0) {
orc_sse_emit_movdqa (p, src1, tmp);
orc_sse_emit_psadbw (p, src2, tmp);
}
+#else
+ orc_sse_emit_movdqa (p, src1, tmp);
+ orc_sse_emit_psadbw (p, src2, tmp);
+#endif
orc_sse_emit_paddd (p, tmp, dest);
}
int src = p->vars[insn->src_args[0]].alloc;
int dest = p->vars[insn->dest_args[0]].alloc;
int tmp = p->tmpreg;
- int gptmp = p->gp_tmpreg;
if (src != dest) {
orc_sse_emit_movdqa (p, src, dest);
}
- orc_x86_emit_mov_imm_reg (p, 4, 0x00010001, gptmp);
- orc_x86_emit_mov_reg_sse (p, gptmp, tmp);
- orc_sse_emit_pshufd (p, 0, tmp, tmp);
+ tmp = orc_compiler_get_constant (p, 2, 0x0001);
orc_sse_emit_pminsw (p, tmp, dest);
- orc_x86_emit_mov_imm_reg (p, 4, 0xffffffff, gptmp);
- orc_x86_emit_mov_reg_sse (p, gptmp, tmp);
- orc_sse_emit_pshufd (p, 0, tmp, tmp);
+ tmp = orc_compiler_get_constant (p, 2, 0xffff);
orc_sse_emit_pmaxsw (p, tmp, dest);
}
int src = p->vars[insn->src_args[1]].alloc;
int dest = p->vars[insn->dest_args[0]].alloc;
int tmp = p->tmpreg;
- int gptmp = p->gp_tmpreg;
- orc_x86_emit_mov_imm_reg (p, 4, 0x80008000, gptmp);
- orc_x86_emit_mov_reg_sse (p, gptmp, tmp);
- orc_sse_emit_pshufd (p, 0, tmp, tmp);
+ tmp = orc_compiler_get_constant (p, 2, 0x8000);
orc_sse_emit_pxor(p, tmp, src);
orc_sse_emit_pxor(p, tmp, dest);
orc_sse_emit_pmaxsw (p, src, dest);
orc_sse_emit_psubd(p, tmp, dest);
}
+#ifndef MMX
/* float ops */
#define UNARY_F(opcode,insn_name,code) \
p->vars[insn->src_args[0]].alloc,
p->vars[insn->dest_args[0]].alloc);
}
+#endif
void
orc_compiler_sse_register_rules (OrcTarget *target)
orc_rule_register (rule_set, "accl", sse_rule_accl, NULL);
orc_rule_register (rule_set, "accsadubl", sse_rule_accsadubl, NULL);
+#ifndef MMX
orc_rule_register (rule_set, "addf", sse_rule_addf, NULL);
orc_rule_register (rule_set, "subf", sse_rule_subf, NULL);
orc_rule_register (rule_set, "mulf", sse_rule_mulf, NULL);
orc_rule_register (rule_set, "cmplef", sse_rule_cmplef, NULL);
orc_rule_register (rule_set, "convfl", sse_rule_convfl, NULL);
orc_rule_register (rule_set, "convlf", sse_rule_convlf, NULL);
+#endif
/* slow rules */
orc_rule_register (rule_set, "maxuw", sse_rule_maxuw_slow, NULL);