#include <orc/orcprogram.h>
#include <orc/x86.h>
#include <orc/orcutils.h>
+#include <orc/orcdebug.h>
#define SIZE 65536
x86_emit_mov_reg_memoffset (program, 2, X86_ECX, 0, ptr_reg);
break;
case 4:
- x86_emit_mov_sse_memoffset (program, 4, var->alloc, 0, ptr_reg);
+ x86_emit_mov_sse_memoffset (program, 4, var->alloc, 0, ptr_reg,
+ var->is_aligned);
break;
case 8:
- x86_emit_mov_sse_memoffset (program, 8, var->alloc, 0, ptr_reg);
+ x86_emit_mov_sse_memoffset (program, 8, var->alloc, 0, ptr_reg,
+ var->is_aligned);
break;
case 16:
- x86_emit_mov_sse_memoffset (program, 16, var->alloc, 0, ptr_reg);
+ x86_emit_mov_sse_memoffset (program, 16, var->alloc, 0, ptr_reg,
+ var->is_aligned);
break;
default:
printf("ERROR\n");
}
}
+int
+orc_program_get_dest (OrcProgram *program)
+{
+ int k;
+
+ for(k=0;k<program->n_vars;k++){
+ if (program->vars[k].vartype == ORC_VAR_TYPE_DEST) {
+ return k;
+ }
+ }
+
+ ORC_ERROR("can't find dest");
+ return -1;
+}
+
void
orc_program_sse_assemble (OrcProgram *program)
{
+ int dest_var = orc_program_get_dest (program);
+
+ program->vars[dest_var].is_aligned = FALSE;
+
sse_emit_prologue (program);
- x86_emit_mov_memoffset_reg (program, 4, (int)ORC_STRUCT_OFFSET(OrcExecutor,n),
- x86_exec_ptr, X86_ECX);
- x86_emit_sar_imm_reg (program, 4, program->loop_shift, X86_ECX);
- x86_emit_mov_reg_memoffset (program, 4, X86_ECX,
- (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), x86_exec_ptr);
+ if (program->loop_shift > 0) {
+
+ x86_emit_mov_imm_reg (program, 4, 16, X86_EAX);
+ x86_emit_sub_memoffset_reg (program, 4,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[dest_var]),
+ x86_exec_ptr, X86_EAX);
+ x86_emit_and_imm_reg (program, 4, 15, X86_EAX);
+ /* FIXME size shift */
+ x86_emit_sar_imm_reg (program, 4, 1, X86_EAX);
+
+ x86_emit_cmp_reg_memoffset (program, 4, X86_EAX,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,n), x86_exec_ptr);
+
+ x86_emit_jle (program, 6);
+
+ x86_emit_mov_reg_memoffset (program, 4, X86_EAX,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), x86_exec_ptr);
+
+ x86_emit_mov_memoffset_reg (program, 4,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,n), x86_exec_ptr, X86_ECX);
+ x86_emit_sub_reg_reg (program, 4, X86_EAX, X86_ECX);
+
+ x86_emit_mov_reg_reg (program, 4, X86_ECX, X86_EAX);
+
+ x86_emit_sar_imm_reg (program, 4, program->loop_shift, X86_ECX);
+ x86_emit_mov_reg_memoffset (program, 4, X86_ECX,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), x86_exec_ptr);
+
+ x86_emit_and_imm_reg (program, 4, (1<<program->loop_shift)-1, X86_EAX);
+ x86_emit_mov_reg_memoffset (program, 4, X86_EAX,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), x86_exec_ptr);
+
+ x86_emit_jmp (program, 7);
+ x86_emit_label (program, 6);
- x86_emit_mov_memoffset_reg (program, 4,
- (int)ORC_STRUCT_OFFSET(OrcExecutor,n), x86_exec_ptr, X86_ECX);
- x86_emit_and_imm_reg (program, 4, (1<<program->loop_shift)-1, X86_ECX);
- x86_emit_mov_reg_memoffset (program, 4, X86_ECX,
- (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), x86_exec_ptr);
+ x86_emit_mov_memoffset_reg (program, 4,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,n), x86_exec_ptr, X86_EAX);
+ x86_emit_mov_reg_memoffset (program, 4, X86_EAX,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), x86_exec_ptr);
+ x86_emit_mov_imm_reg (program, 4, 0, X86_EAX);
+ x86_emit_mov_reg_memoffset (program, 4, X86_EAX,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), x86_exec_ptr);
+ x86_emit_mov_reg_memoffset (program, 4, X86_EAX,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), x86_exec_ptr);
+
+ x86_emit_label (program, 7);
+ } else {
+ x86_emit_mov_memoffset_reg (program, 4,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,n), x86_exec_ptr, X86_ECX);
+ x86_emit_mov_reg_memoffset (program, 4, X86_ECX,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), x86_exec_ptr);
+ }
sse_load_constants (program);
x86_emit_jne (program, 0);
program->loop_shift = save_loop_shift;
+ program->vars[dest_var].is_aligned = TRUE;
}
- x86_emit_align (program);
x86_emit_label (program, 1);
x86_emit_cmp_imm_memoffset (program, 4, 0,
(int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), x86_exec_ptr);
x86_emit_je (program, 3);
+ x86_emit_align (program);
x86_emit_label (program, 2);
sse_emit_loop (program);
x86_emit_dec_memoffset (program, 4,
x86_emit_jne (program, 2);
x86_emit_label (program, 3);
+ if (program->loop_shift > 0) {
+ int save_loop_shift;
+
+ program->vars[dest_var].is_aligned = FALSE;
+ x86_emit_cmp_imm_memoffset (program, 4, 0,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), x86_exec_ptr);
+ x86_emit_je (program, 5);
+
+ save_loop_shift = program->loop_shift;
+ program->loop_shift = 0;
+
+ x86_emit_label (program, 4);
+ sse_emit_loop (program);
+ x86_emit_dec_memoffset (program, 4,
+ (int)ORC_STRUCT_OFFSET(OrcExecutor,counter3),
+ x86_exec_ptr);
+ x86_emit_jne (program, 4);
+
+ x86_emit_label (program, 5);
+
+ program->loop_shift = save_loop_shift;
+ }
+
sse_emit_epilogue (program);
sse_do_fixups (program);
int alloc;
int is_chained;
+ int is_aligned;
+ int is_uncached;
int value;
void
x86_emit_mov_sse_memoffset (OrcProgram *program, int size, int reg1, int offset,
- int reg2)
+ int reg2, int aligned)
{
switch (size) {
case 4:
*program->codeptr++ = 0xd6;
break;
case 16:
- printf(" movdqu %%%s, %d(%%%s)\n", x86_get_regname_sse(reg1), offset,
- x86_get_regname_ptr(reg2));
- *program->codeptr++ = 0xf3;
- *program->codeptr++ = 0x0f;
- *program->codeptr++ = 0x7f;
+ if (aligned) {
+ printf(" movdqa %%%s, %d(%%%s)\n", x86_get_regname_sse(reg1), offset,
+ x86_get_regname_ptr(reg2));
+ *program->codeptr++ = 0x66;
+ *program->codeptr++ = 0x0f;
+ *program->codeptr++ = 0x7f;
+ } else {
+ printf(" movdqu %%%s, %d(%%%s)\n", x86_get_regname_sse(reg1), offset,
+ x86_get_regname_ptr(reg2));
+ *program->codeptr++ = 0xf3;
+ *program->codeptr++ = 0x0f;
+ *program->codeptr++ = 0x7f;
+ }
break;
default:
printf("ERROR\n");
}
void
+x86_emit_sub_reg_reg (OrcProgram *program, int size, int reg1, int reg2)
+{
+ if (size == 2) {
+ printf(" subw %%%s, %%%s\n", x86_get_regname_16(reg1),
+ x86_get_regname_16(reg2));
+ *program->codeptr++ = 0x66;
+ } else if (size == 4) {
+ printf(" subl %%%s, %%%s\n", x86_get_regname(reg1),
+ x86_get_regname(reg2));
+ } else {
+ printf(" sub %%%s, %%%s\n", x86_get_regname_64(reg1),
+ x86_get_regname_64(reg2));
+ }
+
+ x86_emit_rex(program, size, reg2, 0, reg1);
+ *program->codeptr++ = 0x29;
+ x86_emit_modrm_reg (program, reg2, reg1);
+}
+
+void
+x86_emit_sub_memoffset_reg (OrcProgram *program, int size,
+ int offset, int reg, int destreg)
+{
+ if (size == 2) {
+ printf(" subw %d(%%%s), %%%s\n", offset,
+ x86_get_regname_ptr(reg),
+ x86_get_regname_16(destreg));
+ *program->codeptr++ = 0x66;
+ } else if (size == 4) {
+ printf(" subl %d(%%%s), %%%s\n", offset,
+ x86_get_regname_ptr(reg),
+ x86_get_regname(destreg));
+ } else {
+ printf(" sub %d(%%%s), %%%s\n", offset,
+ x86_get_regname_ptr(reg),
+ x86_get_regname_64(destreg));
+ }
+
+ x86_emit_rex(program, size, 0, 0, reg);
+ *program->codeptr++ = 0x2b;
+ x86_emit_modrm_memoffset (program, destreg, offset, reg);
+}
+
+void
+x86_emit_cmp_reg_memoffset (OrcProgram *program, int size, int reg1,
+ int offset, int reg)
+{
+ if (size == 2) {
+ printf(" cmpw %%%s, %d(%%%s)\n", x86_get_regname_16(reg1), offset,
+ x86_get_regname_ptr(reg));
+ *program->codeptr++ = 0x66;
+ } else if (size == 4) {
+ printf(" cmpl %%%s, %d(%%%s)\n", x86_get_regname(reg1), offset,
+ x86_get_regname_ptr(reg));
+ } else {
+ printf(" cmp %%%s, %d(%%%s)\n", x86_get_regname_64(reg1), offset,
+ x86_get_regname_ptr(reg));
+ }
+
+ x86_emit_rex(program, size, 0, 0, reg);
+ *program->codeptr++ = 0x39;
+ x86_emit_modrm_memoffset (program, reg1, offset, reg);
+}
+
+void
x86_emit_cmp_imm_memoffset (OrcProgram *program, int size, int value,
int offset, int reg)
{
}
void
-x86_add_fixup (OrcProgram *program, unsigned char *ptr, int label)
+x86_add_fixup (OrcProgram *program, unsigned char *ptr, int label, int type)
{
program->fixups[program->n_fixups].ptr = ptr;
program->fixups[program->n_fixups].label = label;
- program->fixups[program->n_fixups].type = 1;
+ program->fixups[program->n_fixups].type = type;
program->n_fixups++;
}
program->labels[label] = ptr;
}
+void x86_emit_jmp (OrcProgram *program, int label)
+{
+ printf(" jmp .L%d\n", label);
+
+ *program->codeptr++ = 0xeb;
+ x86_add_fixup (program, program->codeptr, label, 0);
+ *program->codeptr++ = 0xff;
+}
+
+void x86_emit_jle (OrcProgram *program, int label)
+{
+ printf(" jle .L%d\n", label);
+
+ *program->codeptr++ = 0x7e;
+ x86_add_fixup (program, program->codeptr, label, 0);
+ *program->codeptr++ = 0xff;
+}
+
void x86_emit_je (OrcProgram *program, int label)
{
printf(" je .L%d\n", label);
*program->codeptr++ = 0x0f;
*program->codeptr++ = 0x84;
- x86_add_fixup (program, program->codeptr, label);
+ x86_add_fixup (program, program->codeptr, label, 1);
*program->codeptr++ = 0xfc;
*program->codeptr++ = 0xff;
*program->codeptr++ = 0xff;
printf(" jne .L%d\n", label);
*program->codeptr++ = 0x0f;
*program->codeptr++ = 0x85;
- x86_add_fixup (program, program->codeptr, label);
+ x86_add_fixup (program, program->codeptr, label, 1);
*program->codeptr++ = 0xfc;
*program->codeptr++ = 0xff;
*program->codeptr++ = 0xff;
void x86_emit_mov_mmx_memoffset (OrcProgram *program, int size, int reg1, int offset,
int reg2);
void x86_emit_mov_sse_memoffset (OrcProgram *program, int size, int reg1, int offset,
- int reg2);
+ int reg2, int aligned);
void x86_emit_mov_imm_reg (OrcProgram *program, int size, int value, int reg1);
void x86_emit_mov_reg_reg (OrcProgram *program, int size, int reg1, int reg2);
void x86_emit_mov_sse_reg_reg (OrcProgram *program, int reg1, int reg2);
void x86_emit_and_imm_memoffset (OrcProgram *program, int size, int value, int offset, int reg);
void x86_emit_add_imm_reg (OrcProgram *program, int size, int value, int reg);
void x86_emit_and_imm_reg (OrcProgram *program, int size, int value, int reg);
+void x86_emit_sub_reg_reg (OrcProgram *program, int size, int reg1, int reg2);
+void x86_emit_sub_memoffset_reg (OrcProgram *program, int size,
+ int offset, int reg, int destreg);
+void x86_emit_cmp_reg_memoffset (OrcProgram *program, int size, int reg1,
+ int offset, int reg);
void x86_emit_cmp_imm_memoffset (OrcProgram *program, int size, int value,
int offset, int reg);
void x86_emit_emms (OrcProgram *program);
void x86_emit_ret (OrcProgram *program);
+void x86_emit_jle (OrcProgram *program, int label);
void x86_emit_je (OrcProgram *program, int label);
void x86_emit_jne (OrcProgram *program, int label);
+void x86_emit_jmp (OrcProgram *program, int label);
void x86_emit_label (OrcProgram *program, int label);
void x86_emit_align (OrcProgram *program);