From 93bc64f47065b79a0794c720f8445caff8f0952b Mon Sep 17 00:00:00 2001 From: David Schleef Date: Sun, 8 Mar 2009 13:31:22 -0700 Subject: [PATCH] Add ARM backend. Almost works. --- orc/Makefile.am | 5 +- orc/arm.c | 308 +++++++++++++++++++++++++++ orc/arm.h | 91 ++++++++ orc/orc.c | 1 + orc/orcprogram-arm.c | 341 ++++++++++++++++++++++++++++++ orc/orcprogram-sse.c | 15 -- orc/orcprogram.c | 25 ++- orc/orcprogram.h | 7 +- orc/orcrules-arm.c | 585 +++++++++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 1360 insertions(+), 18 deletions(-) create mode 100644 orc/arm.c create mode 100644 orc/arm.h create mode 100644 orc/orcprogram-arm.c create mode 100644 orc/orcrules-arm.c diff --git a/orc/Makefile.am b/orc/Makefile.am index cff11ed..7593a49 100644 --- a/orc/Makefile.am +++ b/orc/Makefile.am @@ -13,6 +13,7 @@ liborc_@ORC_MAJORMINOR@_la_SOURCES = \ orcexecutor.c \ orcrule.c \ orcprogram.c \ + orcprogram-arm.c \ orcprogram-c.c \ orcprogram-powerpc.c \ orcprogram-mmx.c \ @@ -21,9 +22,11 @@ liborc_@ORC_MAJORMINOR@_la_SOURCES = \ orcopcodes.c \ orcrules-mmx.c \ orcrules-sse.c \ + orcrules-arm.c \ orcdebug.c \ orccpu.c \ - x86.c + x86.c \ + arm.c liborc_@ORC_MAJORMINOR@_la_SOURCES += orcprogram-$(CODEMEM_BACKEND).c diff --git a/orc/arm.c b/orc/arm.c new file mode 100644 index 0000000..1428449 --- /dev/null +++ b/orc/arm.c @@ -0,0 +1,308 @@ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +#include +#include + +#include +#include +#include + + +const char * +arm_reg_name (int reg) +{ +#if 1 + static const char *gp_regs[] = { + "a1", "a2", "a3", "a4", + "v1", "v2", "v3", "v4", + "v5", "v6", "v7", "v8", + "ip", "sp", "lr", "pc" }; +#else + static const char *gp_regs[] = { + "r0", "r1", "r2", "r3", + "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", + "r12", "r13", "r14", "r15" }; +#endif + + if (reg < ORC_GP_REG_BASE || reg >= ORC_GP_REG_BASE+16) { + return "ERROR"; + } + + return gp_regs[reg&0xf]; +} + +void +arm_emit (OrcProgram *program, uint32_t insn) +{ + ORC_WRITE_UINT32_LE (program->codeptr, insn); + program->codeptr+=4; +} + +void +arm_emit_bx_lr (OrcProgram *program) +{ + printf(" bx lr\n"); + arm_emit (program, 0xe12fff1e); +} + +void +arm_emit_push (OrcProgram *program, int regs) +{ + int i; + int x = 0; + + printf(" push {"); + for(i=0;i<16;i++){ + if (regs & (1<labels[label] = program->codeptr; +} + +void +arm_add_fixup (OrcProgram *program, int label, int type) +{ + program->fixups[program->n_fixups].ptr = program->codeptr; + program->fixups[program->n_fixups].label = label; + program->fixups[program->n_fixups].type = type; + program->n_fixups++; +} + +void +arm_do_fixups (OrcProgram *program) +{ + int i; + for(i=0;in_fixups;i++){ + unsigned char *label = program->labels[program->fixups[i].label]; + unsigned char *ptr = program->fixups[i].ptr; + uint32_t code; + int diff; + + code = ORC_READ_UINT32_LE (ptr); + diff = ORC_READ_UINT32_LE (ptr) + ((label - ptr) >> 2); + ORC_WRITE_UINT32_LE(ptr, (code&0xff000000) | (diff&0x00ffffff)); + } + +} + +void +arm_emit_branch (OrcProgram *program, int cond, int label) +{ + static const char *cond_names[] = { + "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", + "hi", "ls", "ge", "lt", "gt", "le", "", "" }; + uint32_t code; + + code = 0x0afffffe; + code |= (cond&0xf) << 28; + arm_add_fixup (program, label, 0); + arm_emit (program, code); + + printf(" b%s .L%d\n", cond_names[cond], label); +} + +void +arm_emit_loadimm (OrcProgram *program, int dest, int imm) +{ + uint32_t code; + int shift2; + + shift2 = 0; + while (imm && ((imm&3)==0)) { + imm >>= 2; + shift2++; + } + + code = 0xe3a00000; + code |= (dest&0xf) << 12; + code |= (((16-shift2)&0xf) << 8); + code |= (imm&0xff); + + printf(" mov %s, #0x%08x\n", arm_reg_name (dest), imm << (shift2*2)); + arm_emit (program, code); +} + +void +arm_emit_add (OrcProgram *program, int dest, int src1, int src2) +{ + uint32_t code; + + code = 0xe0800000; + code |= (src1&0xf) << 16; + code |= (dest&0xf) << 12; + code |= (src2&0xf) << 0; + + printf(" add %s, %s, %s\n", + arm_reg_name (dest), + arm_reg_name (src1), + arm_reg_name (src2)); + arm_emit (program, code); +} + +void +arm_emit_sub (OrcProgram *program, int dest, int src1, int src2) +{ + uint32_t code; + + code = 0xe0400000; + code |= (src1&0xf) << 16; + code |= (dest&0xf) << 12; + code |= (src2&0xf) << 0; + + printf(" sub %s, %s, %s\n", + arm_reg_name (dest), + arm_reg_name (src1), + arm_reg_name (src2)); + arm_emit (program, code); +} + +void +arm_emit_sub_imm (OrcProgram *program, int dest, int src1, int value) +{ + uint32_t code; + + code = 0xe2500000; + code |= (src1&0xf) << 16; + code |= (dest&0xf) << 12; + code |= (value) << 0; + + printf(" subs %s, %s, #%d\n", + arm_reg_name (dest), + arm_reg_name (src1), + value); + arm_emit (program, code); +} + +void +arm_emit_cmp_imm (OrcProgram *program, int src1, int value) +{ + uint32_t code; + + code = 0xe3500000; + code |= (src1&0xf) << 16; + code |= (value) << 0; + + printf(" cmp %s, #%d\n", + arm_reg_name (src1), + value); + arm_emit (program, code); +} + +void +arm_emit_load_reg (OrcProgram *program, int dest, int src1, int offset) +{ + uint32_t code; + + code = 0xe5900000; + code |= (src1&0xf) << 16; + code |= (dest&0xf) << 12; + code |= offset&0xfff; + + printf(" ldr %s, [%s, #%d]\n", + arm_reg_name (dest), + arm_reg_name (src1), offset); + arm_emit (program, code); +} + + + +void +arm_emit_dp_reg (OrcProgram *program, int cond, int opcode, int dest, + int src1, int src2) +{ + static const char *dp_insn_names[] = { + "and", "eor", "sub", "rsb", "add", "adc", "sbc", "rsc", + "tst", "teq", "cmp", "cmn", "orr", "mov", "bic", "mvn" + }; + static const int shift_expn[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 0, 1, 0, 1 + }; + uint32_t code; + int update = 0; + + code = cond << 28; + code |= opcode << 21; + code |= update << 20; /* update condition codes */ + if (opcode >= 8 && opcode < 12) { + code |= 1 << 20; + } + code |= (src1&0xf) << 16; + code |= (dest&0xf) << 12; + code |= (src2&0xf) << 0; + + if (shift_expn[opcode]) { + printf(" %s%s %s, %s\n", + dp_insn_names[opcode], + update ? "s" : "", + arm_reg_name (src1), + arm_reg_name (src2)); + } else { + printf(" %s%s %s, %s, %s\n", + dp_insn_names[opcode], + update ? "s" : "", + arm_reg_name (dest), + arm_reg_name (src1), + arm_reg_name (src2)); + } + arm_emit (program, code); +} + diff --git a/orc/arm.h b/orc/arm.h new file mode 100644 index 0000000..181bf51 --- /dev/null +++ b/orc/arm.h @@ -0,0 +1,91 @@ + +#ifndef _ORC_ARM_H_ +#define _ORC_ARM_H_ + +#include + +#define ARM_A1 (ORC_GP_REG_BASE+0) +#define ARM_A2 (ORC_GP_REG_BASE+1) +#define ARM_A3 (ORC_GP_REG_BASE+2) +#define ARM_A4 (ORC_GP_REG_BASE+3) +#define ARM_V1 (ORC_GP_REG_BASE+4) +#define ARM_V2 (ORC_GP_REG_BASE+5) +#define ARM_V3 (ORC_GP_REG_BASE+6) +#define ARM_V4 (ORC_GP_REG_BASE+7) +#define ARM_V5 (ORC_GP_REG_BASE+8) +#define ARM_V6 (ORC_GP_REG_BASE+9) +#define ARM_V7 (ORC_GP_REG_BASE+10) +#define ARM_V8 (ORC_GP_REG_BASE+11) +#define ARM_IP (ORC_GP_REG_BASE+12) +#define ARM_SP (ORC_GP_REG_BASE+13) +#define ARM_LR (ORC_GP_REG_BASE+14) +#define ARM_PC (ORC_GP_REG_BASE+15) + +#define ARM_SB (ORC_GP_REG_BASE+9) + +enum { + ARM_DP_AND = 0, + ARM_DP_EOR, + ARM_DP_SUB, + ARM_DP_RSB, + ARM_DP_ADD, + ARM_DP_ADC, + ARM_DP_SBC, + ARM_DP_RSC, + ARM_DP_TST, + ARM_DP_TEQ, + ARM_DP_CMP, + ARM_DP_CMN, + ARM_DP_ORR, + ARM_DP_MOV, + ARM_DP_BIC, + ARM_DP_MVN +}; + +enum { + ARM_COND_EQ = 0, + ARM_COND_NE, + ARM_COND_CS, + ARM_COND_CC, + ARM_COND_MI, + ARM_COND_PL, + ARM_COND_VS, + ARM_COND_VC, + ARM_COND_HI, + ARM_COND_LS, + ARM_COND_GE, + ARM_COND_LT, + ARM_COND_GT, + ARM_COND_LE, + ARM_COND_AL, +}; + +void arm_emit (OrcProgram *program, uint32_t insn); +void arm_emit_bx_lr (OrcProgram *program); +const char * arm_reg_name (int reg); +void arm_emit_loadimm (OrcProgram *program, int dest, int imm); + +void arm_emit_add (OrcProgram *program, int dest, int src1, int src2); +void arm_emit_sub (OrcProgram *program, int dest, int src1, int src2); +void arm_emit_sub_imm (OrcProgram *program, int dest, int src1, int value); +void arm_emit_cmp_imm (OrcProgram *program, int src1, int value); + +void arm_emit_label (OrcProgram *program, int label); +void arm_emit_push (OrcProgram *program, int regs); +void arm_emit_pop (OrcProgram *program, int regs); +void arm_emit_mov (OrcProgram *program, int dest, int src); +void arm_emit_branch (OrcProgram *program, int cond, int label); + +void arm_emit_dp_reg (OrcProgram *program, int cond, int opcode, int dest, + int src1, int src2); + +void arm_loadw (OrcProgram *program, int dest, int src1, int offset); +void arm_storew (OrcProgram *program, int dest, int offset, int src1); + +void arm_emit_load_reg (OrcProgram *program, int dest, int src1, int offset); + +void arm_do_fixups (OrcProgram *program); + + +#endif + diff --git a/orc/orc.c b/orc/orc.c index 19f71e6..f53d153 100644 --- a/orc/orc.c +++ b/orc/orc.c @@ -16,5 +16,6 @@ orc_init (void) orc_sse_init(); orc_powerpc_init(); orc_c_init(); + orc_arm_init(); } diff --git a/orc/orcprogram-arm.c b/orc/orcprogram-arm.c new file mode 100644 index 0000000..97f6aaa --- /dev/null +++ b/orc/orcprogram-arm.c @@ -0,0 +1,341 @@ + +#include "config.h" + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#define SIZE 65536 + +int arm_exec_ptr = ARM_V1; + +void arm_emit_loop (OrcProgram *program); + +void orc_program_arm_register_rules (void); + + +void orc_program_rewrite_vars (OrcProgram *program); +void orc_program_dump (OrcProgram *program); + +void +arm_emit_prologue (OrcProgram *program) +{ + unsigned int regs = 0; + int i; + + orc_program_append_code(program,".global _binary_dump_start\n"); + orc_program_append_code(program,"_binary_dump_start:\n"); + + for(i=0;i<16;i++){ + if (program->used_regs[ORC_GP_REG_BASE + i] && + program->save_regs[ORC_GP_REG_BASE + i]) { + regs |= (1<used_regs[X86_EDI]) { + arm_emit_push (program, 4, X86_EDI); + } + if (program->used_regs[X86_ESI]) { + arm_emit_push (program, 4, X86_ESI); + } + if (program->used_regs[X86_EBX]) { + arm_emit_push (program, 4, X86_EBX); + } +#endif +} + +void +arm_dump_insns (OrcProgram *program) +{ + + arm_emit_label (program, 0); + + arm_emit_add (program, ARM_A2, ARM_A3, ARM_A4); + arm_emit_sub (program, ARM_A2, ARM_A3, ARM_A4); + arm_emit_push (program, 0x06); + arm_emit_mov (program, ARM_A2, ARM_A3); + + arm_emit_branch (program, ARM_COND_LE, 0); + arm_emit_branch (program, ARM_COND_AL, 0); + + arm_emit_loadimm (program, ARM_A3, 0xa500); + arm_loadw (program, ARM_A3, ARM_A4, 0xa5); + arm_emit_load_reg (program, ARM_A3, ARM_A4, 0x5a5); +} + +void +arm_emit_epilogue (OrcProgram *program) +{ + int i; + unsigned int regs = 0; + + for(i=0;i<16;i++){ + if (program->used_regs[ORC_GP_REG_BASE + i] && + program->save_regs[ORC_GP_REG_BASE + i]) { + regs |= (1<valid_regs[i] = 1; + } + program->valid_regs[ARM_V1] = 0; + //program->valid_regs[ARM_SB] = 0; + program->valid_regs[ARM_IP] = 0; + program->valid_regs[ARM_SP] = 0; + program->valid_regs[ARM_LR] = 0; + program->valid_regs[ARM_PC] = 0; + for(i=4;i<11;i++) { + program->save_regs[ORC_GP_REG_BASE+i] = 1; + } + + for(i=0;ialloc_regs[i] = 0; + program->used_regs[i] = 0; + } + + program->loop_shift = 0; +} + +void +arm_load_constants (OrcProgram *program) +{ + int i; + for(i=0;in_vars;i++){ + switch (program->vars[i].vartype) { + case ORC_VAR_TYPE_CONST: + //arm_emit_loadiw (program, program->vars[i].alloc, + // (int)program->vars[i].value); + break; + case ORC_VAR_TYPE_PARAM: + //arm_emit_loadw (program, program->vars[i].alloc, + // (int)ORC_STRUCT_OFFSET(OrcExecutor, params[i]), arm_exec_ptr); + break; + case ORC_VAR_TYPE_SRC: + case ORC_VAR_TYPE_DEST: + arm_emit_load_reg (program, + program->vars[i].ptr_register, + arm_exec_ptr, ORC_STRUCT_OFFSET(OrcExecutor, arrays[i])); + break; + default: + break; + } + } +} + +void +arm_emit_load_src (OrcProgram *program, OrcVariable *var) +{ + int ptr_reg; + if (var->ptr_register == 0) { + int i; + i = var - program->vars; + //arm_emit_mov_memoffset_reg (program, arm_ptr_size, + // (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]), + // arm_exec_ptr, X86_ECX); + ptr_reg = ARM_PC; + } else { + ptr_reg = var->ptr_register; + } + switch (var->size << program->loop_shift) { + //case 1: + //arm_emit_mov_memoffset_reg (program, 1, 0, ptr_reg, X86_ECX); + //arm_emit_mov_reg_arm (program, X86_ECX, var->alloc); + break; + case 2: + arm_loadw (program, var->alloc, ptr_reg, 0); + //arm_emit_mov_memoffset_reg (program, 2, 0, ptr_reg, X86_ECX); + //arm_emit_mov_reg_arm (program, X86_ECX, var->alloc); + break; + //case 4: + //arm_emit_mov_memoffset_arm (program, 4, 0, ptr_reg, var->alloc); + break; + //case 8: + //arm_emit_mov_memoffset_arm (program, 8, 0, ptr_reg, var->alloc); + break; + //case 16: + //arm_emit_mov_memoffset_arm (program, 16, 0, ptr_reg, var->alloc); + break; + default: + printf("ERROR bad size %d\n", var->size << program->loop_shift); + } +} + +void +arm_emit_store_dest (OrcProgram *program, OrcVariable *var) +{ + int ptr_reg; + if (var->ptr_register == 0) { + //arm_emit_mov_memoffset_reg (program, arm_ptr_size, + // var->ptr_offset, arm_exec_ptr, X86_ECX); + ptr_reg = ARM_PC; + } else { + ptr_reg = var->ptr_register; + } + switch (var->size << program->loop_shift) { + case 1: + //arm_emit_mov_arm_reg (program, var->alloc, X86_ECX); + //arm_emit_mov_reg_memoffset (program, 1, X86_ECX, 0, ptr_reg); + break; + case 2: + arm_storew (program, ptr_reg, 0, var->alloc); + //arm_emit_mov_arm_reg (program, var->alloc, X86_ECX); + //arm_emit_mov_reg_memoffset (program, 2, X86_ECX, 0, ptr_reg); + break; + case 4: + //arm_emit_mov_arm_memoffset (program, 4, var->alloc, 0, ptr_reg, + // var->is_aligned, var->is_uncached); + break; + case 8: + //arm_emit_mov_arm_memoffset (program, 8, var->alloc, 0, ptr_reg, + // var->is_aligned, var->is_uncached); + break; + case 16: + //arm_emit_mov_arm_memoffset (program, 16, var->alloc, 0, ptr_reg, + // var->is_aligned, var->is_uncached); + break; + default: + printf("ERROR\n"); + } +} + +void +orc_program_arm_assemble (OrcProgram *program) +{ + int dest_var = orc_program_get_dest (program); + + program->vars[dest_var].is_aligned = FALSE; + + arm_emit_prologue (program); + + arm_emit_load_reg (program, ARM_IP, arm_exec_ptr, + (int)ORC_STRUCT_OFFSET(OrcExecutor,n)); + arm_load_constants (program); + + arm_emit_label (program, 1); + + arm_emit_cmp_imm (program, ARM_IP, 0); + arm_emit_branch (program, ARM_COND_EQ, 3); + + arm_emit_label (program, 2); + arm_emit_loop (program); + arm_emit_sub_imm (program, ARM_IP, ARM_IP, 1); + arm_emit_cmp_imm (program, ARM_IP, 0); + arm_emit_branch (program, ARM_COND_NE, 2); + arm_emit_label (program, 3); + + arm_emit_epilogue (program); + + arm_do_fixups (program); +} + +void +arm_emit_loop (OrcProgram *program) +{ + int j; + int k; + OrcInstruction *insn; + OrcOpcode *opcode; + OrcVariable *args[10]; + OrcRule *rule; + + for(j=0;jn_insns;j++){ + insn = program->insns + j; + opcode = insn->opcode; + + orc_program_append_code(program,"# %d: %s", j, insn->opcode->name); + + /* set up args */ + for(k=0;kn_src + opcode->n_dest;k++){ + args[k] = program->vars + insn->args[k]; + orc_program_append_code(program," %d", args[k]->alloc); + if (args[k]->is_chained) { + orc_program_append_code(program," (chained)"); + } + } + orc_program_append_code(program,"\n"); + + for(k=opcode->n_dest;kn_src + opcode->n_dest;k++){ + switch (args[k]->vartype) { + case ORC_VAR_TYPE_SRC: + arm_emit_load_src (program, args[k]); + break; + case ORC_VAR_TYPE_CONST: + break; + case ORC_VAR_TYPE_PARAM: + break; + case ORC_VAR_TYPE_TEMP: + break; + default: + break; + } + } + + rule = insn->rule; + if (rule && rule->emit) { + if (args[0]->alloc != args[1]->alloc) { + arm_emit_mov (program, args[1]->alloc, args[0]->alloc); + } + rule->emit (program, rule->emit_user, insn); + } else { + orc_program_append_code(program,"No rule for: %s\n", opcode->name); + } + + for(k=0;kn_dest;k++){ + switch (args[k]->vartype) { + case ORC_VAR_TYPE_DEST: + arm_emit_store_dest (program, args[k]); + break; + case ORC_VAR_TYPE_TEMP: + break; + default: + break; + } + } + } + + for(k=0;kn_vars;k++){ + if (program->vars[k].vartype == ORC_VAR_TYPE_SRC || + program->vars[k].vartype == ORC_VAR_TYPE_DEST) { + if (program->vars[k].ptr_register) { + //arm_emit_add_imm_reg (program, arm_ptr_size, + // program->vars[k].size << program->loop_shift, + // program->vars[k].ptr_register); + } else { + //arm_emit_add_imm_memoffset (program, arm_ptr_size, + // program->vars[k].size << program->loop_shift, + // (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[k]), + // arm_exec_ptr); + } + } + } +} + diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c index 2af6a0d..dc2b7fe 100644 --- a/orc/orcprogram-sse.c +++ b/orc/orcprogram-sse.c @@ -252,21 +252,6 @@ sse_emit_store_dest (OrcProgram *program, OrcVariable *var) } } -int -orc_program_get_dest (OrcProgram *program) -{ - int k; - - for(k=0;kn_vars;k++){ - if (program->vars[k].vartype == ORC_VAR_TYPE_DEST) { - return k; - } - } - - ORC_ERROR("can't find dest"); - return -1; -} - void orc_program_sse_assemble (OrcProgram *program) { diff --git a/orc/orcprogram.c b/orc/orcprogram.c index c82a343..70b9a2a 100644 --- a/orc/orcprogram.c +++ b/orc/orcprogram.c @@ -7,6 +7,7 @@ #include #include +#include void orc_program_assign_rules (OrcProgram *program); void orc_program_global_reg_alloc (OrcProgram *program); @@ -14,7 +15,7 @@ void orc_program_rewrite_vars (OrcProgram *program); void orc_program_rewrite_vars2 (OrcProgram *program); void orc_program_do_regs (OrcProgram *program); -int _orc_default_target = ORC_TARGET_SSE; +int _orc_default_target = ORC_TARGET_ARM; OrcProgram * orc_program_new (void) @@ -197,6 +198,21 @@ orc_program_find_var_by_name (OrcProgram *program, const char *name) return -1; } +int +orc_program_get_dest (OrcProgram *program) +{ + int k; + + for(k=0;kn_vars;k++){ + if (program->vars[k].vartype == ORC_VAR_TYPE_DEST) { + return k; + } + } + + ORC_ERROR("can't find dest"); + return -1; +} + void orc_program_append_str (OrcProgram *program, const char *name, const char *arg1, const char *arg2, const char *arg3) @@ -242,6 +258,7 @@ orc_program_allocate_register (OrcProgram *program, int data_reg) if (data_reg) { offset = ORC_VEC_REG_BASE; +offset = ORC_GP_REG_BASE; } else { offset = ORC_GP_REG_BASE; } @@ -290,6 +307,9 @@ orc_program_compile (OrcProgram *program) case ORC_TARGET_MMX: orc_program_mmx_init (program); break; + case ORC_TARGET_ARM: + orc_program_arm_init (program); + break; default: break; } @@ -322,6 +342,9 @@ orc_program_compile (OrcProgram *program) case ORC_TARGET_SSE: orc_program_sse_assemble (program); break; + case ORC_TARGET_ARM: + orc_program_arm_assemble (program); + break; default: break; } diff --git a/orc/orcprogram.h b/orc/orcprogram.h index d27bffd..5a63635 100644 --- a/orc/orcprogram.h +++ b/orc/orcprogram.h @@ -56,7 +56,8 @@ enum { ORC_TARGET_C = 0, ORC_TARGET_ALTIVEC = 1, ORC_TARGET_MMX = 2, - ORC_TARGET_SSE = 3 + ORC_TARGET_SSE = 3, + ORC_TARGET_ARM = 4 }; typedef enum { @@ -218,6 +219,7 @@ void orc_program_append_ds_str (OrcProgram *p, const char *opcode, void orc_mmx_init (void); void orc_sse_init (void); +void orc_arm_init (void); void orc_powerpc_init (void); void orc_c_init (void); @@ -227,14 +229,17 @@ void orc_program_compile (OrcProgram *p); void orc_program_c_init (OrcProgram *p); void orc_program_mmx_init (OrcProgram *p); void orc_program_sse_init (OrcProgram *p); +void orc_program_arm_init (OrcProgram *p); void orc_program_powerpc_init (OrcProgram *p); void orc_program_mmx_assemble (OrcProgram *p); void orc_program_sse_assemble (OrcProgram *p); +void orc_program_arm_assemble (OrcProgram *p); void orc_program_assemble_powerpc (OrcProgram *p); void orc_program_assemble_c (OrcProgram *p); void orc_program_free (OrcProgram *program); int orc_program_find_var_by_name (OrcProgram *program, const char *name); +int orc_program_get_dest (OrcProgram *program); int orc_program_add_temporary (OrcProgram *program, int size, const char *name); int orc_program_dup_temporary (OrcProgram *program, int i, int j); diff --git a/orc/orcrules-arm.c b/orc/orcrules-arm.c new file mode 100644 index 0000000..f4075a3 --- /dev/null +++ b/orc/orcrules-arm.c @@ -0,0 +1,585 @@ + +#include "config.h" + +#include +#include +#include + +#include +#include + +#include +#include + + + +void +arm_loadw (OrcProgram *program, int dest, int src1, int offset) +{ + uint32_t code; + + code = 0xe1d000b0; + code |= (src1&0xf) << 16; + code |= (dest&0xf) << 12; + code |= (offset&0xf0) << 4; + code |= offset&0x0f; + + printf(" ldrh %s, [%s, #%d]\n", + arm_reg_name (dest), + arm_reg_name (src1), offset); + arm_emit (program, code); +} + +void +arm_storew (OrcProgram *program, int dest, int offset, int src1) +{ + uint32_t code; + + code = 0xe1c000b0; + code |= (dest&0xf) << 16; + code |= (src1&0xf) << 12; + code |= (offset&0xf0) << 4; + code |= offset&0x0f; + + printf(" strh %s, [%s, #%d]\n", + arm_reg_name (src1), + arm_reg_name (dest), offset); + arm_emit (program, code); +} + +static void +arm_rule_addw (OrcProgram *p, void *user, OrcInstruction *insn) +{ + uint32_t code; + + code = 0xe0800000; + code |= (p->vars[insn->args[1]].alloc&0xf) << 16; + code |= (p->vars[insn->args[0]].alloc&0xf) << 12; + code |= (p->vars[insn->args[2]].alloc&0xf) << 0; + + printf(" add %s, %s, %s\n", + arm_reg_name (p->vars[insn->args[0]].alloc), + arm_reg_name (p->vars[insn->args[1]].alloc), + arm_reg_name (p->vars[insn->args[2]].alloc)); + arm_emit (p, code); +} + +static void +arm_rule_subw (OrcProgram *p, void *user, OrcInstruction *insn) +{ + uint32_t code; + + code = 0xe0400000; + code |= (p->vars[insn->args[1]].alloc&0xf) << 16; + code |= (p->vars[insn->args[0]].alloc&0xf) << 12; + code |= (p->vars[insn->args[2]].alloc&0xf) << 0; + + printf(" sub %s, %s, %s\n", + arm_reg_name (p->vars[insn->args[0]].alloc), + arm_reg_name (p->vars[insn->args[1]].alloc), + arm_reg_name (p->vars[insn->args[2]].alloc)); + arm_emit (p, code); +} + +static void +arm_rule_mullw (OrcProgram *p, void *user, OrcInstruction *insn) +{ + uint32_t code; + + code = 0xe0000090; + code |= (p->vars[insn->args[0]].alloc&0xf) << 16; + code |= (p->vars[insn->args[1]].alloc&0xf) << 0; + code |= (p->vars[insn->args[2]].alloc&0xf) << 8; + + printf(" mul %s, %s, %s\n", + arm_reg_name (p->vars[insn->args[0]].alloc), + arm_reg_name (p->vars[insn->args[1]].alloc), + arm_reg_name (p->vars[insn->args[2]].alloc)); + arm_emit (p, code); +} + +static void +arm_rule_shrsw (OrcProgram *p, void *user, OrcInstruction *insn) +{ + uint32_t code; + + code = 0xe1a00050; + code |= (p->vars[insn->args[0]].alloc&0xf) << 12; + code |= (p->vars[insn->args[1]].alloc&0xf) << 0; + code |= (p->vars[insn->args[2]].alloc&0xf) << 8; + + printf(" asr %s, %s, %s\n", + arm_reg_name (p->vars[insn->args[0]].alloc), + arm_reg_name (p->vars[insn->args[1]].alloc), + arm_reg_name (p->vars[insn->args[2]].alloc)); + arm_emit (p, code); +} + + +#if 0 +void +arm_emit_loadiw (OrcProgram *p, int reg, int value) +{ + if (value == 0) { + printf(" pxor %%%s, %%%s\n", x86_get_regname_sse(reg), + x86_get_regname_sse(reg)); + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0xef; + x86_emit_modrm_reg (p, reg, reg); + } else if (value == -1) { + printf(" pcmpeqw %%%s, %%%s\n", x86_get_regname_sse(reg), + x86_get_regname_sse(reg)); + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x75; + x86_emit_modrm_reg (p, reg, reg); + + } else if (value == 1) { + printf(" pcmpeqw %%%s, %%%s\n", x86_get_regname_sse(reg), + x86_get_regname_sse(reg)); + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x75; + x86_emit_modrm_reg (p, reg, reg); + + printf(" psrlw $15, %%%s\n", x86_get_regname_sse(reg)); + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x71; + x86_emit_modrm_reg (p, reg, 2); + *p->codeptr++ = 15; + } else { + value &= 0xffff; + value |= (value<<16); + + x86_emit_mov_imm_reg (p, 4, value, X86_ECX); + + printf(" movd %%ecx, %%%s\n", x86_get_regname_sse(reg)); + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x6e; + x86_emit_modrm_reg (p, X86_ECX, reg); + + printf(" pshufd $0, %%%s, %%%s\n", x86_get_regname_sse(reg), + x86_get_regname_sse(reg)); + + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x70; + x86_emit_modrm_reg (p, reg, reg); + *p->codeptr++ = 0x00; + } +} + +void +arm_emit_loadw (OrcProgram *p, int reg, int offset, int reg1) +{ + printf(" movd %d(%%%s), %%%s\n", offset, x86_get_regname_ptr(reg1), + x86_get_regname_sse(reg)); + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x6e; + x86_emit_modrm_memoffset (p, reg, offset, reg1); + + printf(" pshuflw $0, %%%s, %%%s\n", x86_get_regname_sse(reg), + x86_get_regname_sse(reg)); + *p->codeptr++ = 0xf2; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x70; + x86_emit_modrm_reg (p, reg, reg); + *p->codeptr++ = 0x00; + + printf(" pshufd $0, %%%s, %%%s\n", x86_get_regname_sse(reg), + x86_get_regname_sse(reg)); + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x70; + x86_emit_modrm_reg (p, reg, reg); + *p->codeptr++ = 0x00; +} + +static void +arm_rule_copyx (OrcProgram *p, void *user, OrcInstruction *insn) +{ + printf(" movdqa %%%s, %%%s\n", + x86_get_regname_sse(p->vars[insn->args[1]].alloc), + x86_get_regname_sse(p->vars[insn->args[0]].alloc)); + + *p->codeptr++ = 0x66; + x86_emit_rex (p, 0, p->vars[insn->args[1]].alloc, 0, + p->vars[insn->args[0]].alloc); + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x6f; + x86_emit_modrm_reg (p, p->vars[insn->args[1]].alloc, + p->vars[insn->args[0]].alloc); +} + +static void +arm_emit_66_rex_0f (OrcProgram *p, OrcInstruction *insn, int code, + const char *insn_name) +{ + printf(" %s %%%s, %%%s\n", insn_name, + x86_get_regname_sse(p->vars[insn->args[2]].alloc), + x86_get_regname_sse(p->vars[insn->args[0]].alloc)); + + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + if (code & 0xff00) { + *p->codeptr++ = code >> 8; + *p->codeptr++ = code & 0xff; + } else { + *p->codeptr++ = code; + } + x86_emit_modrm_reg (p, p->vars[insn->args[2]].alloc, + p->vars[insn->args[0]].alloc); +} + +#if 0 +static void +arm_rule_addw (OrcProgram *p, void *user, OrcInstruction *insn) +{ + arm_emit_66_rex_0f (p, insn, 0xfd, "paddw"); +} + +static void +arm_rule_subw (OrcProgram *p, void *user, OrcInstruction *insn) +{ + arm_emit_66_rex_0f (p, insn, 0xf9, "psubw"); +} + +static void +arm_rule_mullw (OrcProgram *p, void *user, OrcInstruction *insn) +{ + arm_emit_66_rex_0f (p, insn, 0xd5, "pmullw"); +} +#endif + +#define UNARY(opcode,insn_name,code) \ +static void \ +arm_rule_ ## opcode (OrcProgram *p, void *user, OrcInstruction *insn) \ +{ \ + arm_emit_66_rex_0f (p, insn, code, insn_name); \ +} + +#define BINARY(opcode,insn_name,code) \ +static void \ +arm_rule_ ## opcode (OrcProgram *p, void *user, OrcInstruction *insn) \ +{ \ + arm_emit_66_rex_0f (p, insn, code, insn_name); \ +} + + +UNARY(absb,"pabsb",0x381c) +BINARY(addb,"paddb",0xfc) +BINARY(addssb,"paddsb",0xec) +BINARY(addusb,"paddusb",0xdc) +BINARY(andb,"pand",0xdb) +BINARY(andnb,"pandn",0xdf) +BINARY(avgub,"pavgb",0xe0) +BINARY(cmpeqb,"pcmpeqb",0x74) +BINARY(cmpgtsb,"pcmpgtb",0x64) +BINARY(maxsb,"pmaxsb",0x383c) +BINARY(maxub,"pmaxub",0xde) +BINARY(minsb,"pminsb",0x3838) +BINARY(minub,"pminub",0xda) +//BINARY(mullb,"pmullb",0xd5) +//BINARY(mulhsb,"pmulhb",0xe5) +//BINARY(mulhub,"pmulhub",0xe4) +BINARY(orb,"por",0xeb) +UNARY(signb,"psignb",0x3808) +BINARY(subb,"psubb",0xf8) +BINARY(subssb,"psubsb",0xe8) +BINARY(subusb,"psubusb",0xd8) +BINARY(xorb,"pxor",0xef) + +UNARY(absw,"pabsw",0x381d) +BINARY(addw,"paddw",0xfd) +BINARY(addssw,"paddsw",0xed) +BINARY(addusw,"paddusw",0xdd) +BINARY(andw,"pand",0xdb) +BINARY(andnw,"pandn",0xdf) +BINARY(avguw,"pavgw",0xe3) +BINARY(cmpeqw,"pcmpeqw",0x75) +BINARY(cmpgtsw,"pcmpgtw",0x65) +BINARY(maxsw,"pmaxsw",0xee) +BINARY(maxuw,"pmaxuw",0x383e) +BINARY(minsw,"pminsw",0xea) +BINARY(minuw,"pminuw",0x383a) +BINARY(mullw,"pmullw",0xd5) +BINARY(mulhsw,"pmulhw",0xe5) +BINARY(mulhuw,"pmulhuw",0xe4) +BINARY(orw,"por",0xeb) +UNARY(signw,"psignw",0x3809) +BINARY(subw,"psubw",0xf9) +BINARY(subssw,"psubsw",0xe9) +BINARY(subusw,"psubusw",0xd9) +BINARY(xorw,"pxor",0xef) + +UNARY(absl,"pabsd",0x381e) +BINARY(addl,"paddd",0xfe) +//BINARY(addssl,"paddsd",0xed) +//BINARY(addusl,"paddusd",0xdd) +BINARY(andl,"pand",0xdb) +BINARY(andnl,"pandn",0xdf) +//BINARY(avgul,"pavgd",0xe3) +BINARY(cmpeql,"pcmpeqd",0x76) +BINARY(cmpgtsl,"pcmpgtd",0x66) +BINARY(maxsl,"pmaxsd",0x383d) +BINARY(maxul,"pmaxud",0x383f) +BINARY(minsl,"pminsd",0x3839) +BINARY(minul,"pminud",0x383b) +BINARY(mulll,"pmulld",0x3840) +BINARY(mulhsl,"pmulhd",0xe5) +BINARY(mulhul,"pmulhud",0xe4) +BINARY(orl,"por",0xeb) +UNARY(signl,"psignd",0x380a) +BINARY(subl,"psubd",0xfa) +//BINARY(subssl,"psubsd",0xe9) +//BINARY(subusl,"psubusd",0xd9) +BINARY(xorl,"pxor",0xef) + + +static void +arm_rule_shlw (OrcProgram *p, void *user, OrcInstruction *insn) +{ + if (p->vars[insn->args[2]].vartype == ORC_VAR_TYPE_CONST) { + printf(" psllw $%d, %%%s\n", + p->vars[insn->args[2]].value, + x86_get_regname_sse(p->vars[insn->args[0]].alloc)); + + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x71; + x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, 6); + *p->codeptr++ = p->vars[insn->args[2]].value; + } else if (p->vars[insn->args[2]].vartype == ORC_VAR_TYPE_PARAM) { + /* FIXME this is a gross hack to reload the register with a + * 64-bit version of the parameter. */ + printf(" movd %d(%%%s), %%%s\n", + (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->args[2]]), + x86_get_regname_ptr(x86_exec_ptr), + x86_get_regname_sse(p->vars[insn->args[2]].alloc)); + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x6e; + x86_emit_modrm_memoffset (p, + p->vars[insn->args[2]].alloc, + (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->args[2]]), + x86_exec_ptr); + + printf(" psllw %%%s, %%%s\n", + x86_get_regname_sse(p->vars[insn->args[2]].alloc), + x86_get_regname_sse(p->vars[insn->args[0]].alloc)); + + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0xf1; + x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, + p->vars[insn->args[2]].alloc); + } else { + printf("ERROR\n"); + } +} + +static void +arm_rule_shrsw (OrcProgram *p, void *user, OrcInstruction *insn) +{ + if (p->vars[insn->args[2]].vartype == ORC_VAR_TYPE_CONST) { + printf(" psraw $%d, %%%s\n", + p->vars[insn->args[2]].value, + x86_get_regname_sse(p->vars[insn->args[0]].alloc)); + + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x71; + x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, 4); + *p->codeptr++ = p->vars[insn->args[2]].value; + } else if (p->vars[insn->args[2]].vartype == ORC_VAR_TYPE_PARAM) { + /* FIXME this is a gross hack to reload the register with a + * 64-bit version of the parameter. */ + printf(" movd %d(%%%s), %%%s\n", + (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->args[2]]), + x86_get_regname_ptr(x86_exec_ptr), + x86_get_regname_sse(p->vars[insn->args[2]].alloc)); + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x6e; + x86_emit_modrm_memoffset (p, + p->vars[insn->args[2]].alloc, + (int)ORC_STRUCT_OFFSET(OrcExecutor, params[insn->args[2]]), + x86_exec_ptr); + + printf(" psraw %%%s, %%%s\n", + x86_get_regname_sse(p->vars[insn->args[2]].alloc), + x86_get_regname_sse(p->vars[insn->args[0]].alloc)); + + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0xe1; + x86_emit_modrm_reg (p, p->vars[insn->args[2]].alloc, + p->vars[insn->args[0]].alloc); + } else { + printf("ERROR\n"); + } +} + +static void +arm_rule_convsbw (OrcProgram *p, void *user, OrcInstruction *insn) +{ + printf(" punpcklbw %%%s, %%%s\n", + x86_get_regname_sse(p->vars[insn->args[1]].alloc), + x86_get_regname_sse(p->vars[insn->args[0]].alloc)); + + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x60; + x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, + p->vars[insn->args[1]].alloc); + + printf(" psraw $8, %%%s\n", + x86_get_regname_sse(p->vars[insn->args[0]].alloc)); + + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x71; + x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, 4); + *p->codeptr++ = 8; +} + +static void +arm_rule_convubw (OrcProgram *p, void *user, OrcInstruction *insn) +{ + /* FIXME should do this by unpacking with a zero reg */ + + printf(" punpcklbw %%%s, %%%s\n", + x86_get_regname_sse(p->vars[insn->args[1]].alloc), + x86_get_regname_sse(p->vars[insn->args[0]].alloc)); + + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x60; + x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, + p->vars[insn->args[1]].alloc); + + printf(" psrlw $8, %%%s\n", + x86_get_regname_sse(p->vars[insn->args[0]].alloc)); + + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x71; + x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, 2); + *p->codeptr++ = 8; + +} + +static void +arm_rule_convsuswb (OrcProgram *p, void *user, OrcInstruction *insn) +{ + printf(" packuswb %%%s, %%%s\n", + x86_get_regname_sse(p->vars[insn->args[1]].alloc), + x86_get_regname_sse(p->vars[insn->args[0]].alloc)); + + *p->codeptr++ = 0x66; + *p->codeptr++ = 0x0f; + *p->codeptr++ = 0x67; + x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, + p->vars[insn->args[1]].alloc); +} +#endif + +void +orc_program_arm_register_rules (void) +{ +#if 0 +#define REG(x) \ + orc_rule_register ( #x , ORC_TARGET_SSE, arm_rule_ ## x, NULL) + + REG(absb); + REG(addb); + REG(addssb); + REG(addusb); + REG(andb); + REG(andnb); + //REG(avgsb); + REG(avgub); + REG(cmpeqb); + REG(cmpgtsb); + if (sse41) REG(maxsb); + REG(maxub); + if (sse41) REG(minsb); + REG(minub); + //REG(mullb); + //REG(mulhsb); + //REG(mulhub); + REG(orb); + REG(signb); + REG(subb); + REG(subssb); + REG(subusb); + REG(xorb); + + REG(absw); + REG(addw); + REG(addssw); + REG(addusw); + REG(andw); + REG(andnw); + //REG(avgsw); + REG(avguw); + REG(cmpeqw); + REG(cmpgtsw); + REG(maxsw); + if (sse41) REG(maxuw); + REG(minsw); + if (sse41) REG(minuw); + REG(mullw); + REG(mulhsw); + REG(mulhuw); + REG(orw); + REG(signw); + REG(subw); + REG(subssw); + REG(subusw); + REG(xorw); + + REG(absl); + REG(addl); + //REG(addssl); + //REG(addusl); + REG(andl); + REG(andnl); + //REG(avgsl); + //REG(avgul); + REG(cmpeql); + REG(cmpgtsl); + if (sse41) REG(maxsl); + if (sse41) REG(maxul); + if (sse41) REG(minsl); + if (sse41) REG(minul); + if (sse41) REG(mulll); + REG(mulhsl); + REG(mulhul); + REG(orl); + REG(signl); + REG(subl); + //REG(subssl); + //REG(subusl); + REG(xorl); + + orc_rule_register ("copyb", ORC_TARGET_SSE, arm_rule_copyx, NULL); + orc_rule_register ("copyw", ORC_TARGET_SSE, arm_rule_copyx, NULL); + orc_rule_register ("copyl", ORC_TARGET_SSE, arm_rule_copyx, NULL); + + orc_rule_register ("shlw", ORC_TARGET_SSE, arm_rule_shlw, NULL); + orc_rule_register ("shrsw", ORC_TARGET_SSE, arm_rule_shrsw, NULL); + + orc_rule_register ("convsbw", ORC_TARGET_SSE, arm_rule_convsbw, NULL); + orc_rule_register ("convubw", ORC_TARGET_SSE, arm_rule_convubw, NULL); + orc_rule_register ("convsuswb", ORC_TARGET_SSE, arm_rule_convsuswb, NULL); +#endif + orc_rule_register ("addw", ORC_TARGET_ARM, arm_rule_addw, NULL); + orc_rule_register ("subw", ORC_TARGET_ARM, arm_rule_subw, NULL); + orc_rule_register ("mullw", ORC_TARGET_ARM, arm_rule_mullw, NULL); + orc_rule_register ("shrsw", ORC_TARGET_ARM, arm_rule_shrsw, NULL); +} + -- 2.7.4